Exemplo n.º 1
0
 def stop_job(self, job_wrapper):
     """Attempts to delete a job from the DRM queue"""
     job = job_wrapper.get_job()
     try:
         ext_id = job.get_job_runner_external_id()
         assert ext_id not in (None, 'None'), 'External job id is None'
         kill_script = job_wrapper.get_destination_configuration(
             "drmaa_external_killjob_script")
         if kill_script is None:
             self.ds.kill(ext_id)
         else:
             cmd = shlex.split(kill_script)
             cmd.extend([str(ext_id), str(self.userid)])
             commands.execute(cmd)
         log.info("(%s/%s) Removed from DRM queue at user's request" %
                  (job.id, ext_id))
     except drmaa.InvalidJobException:
         log.exception(
             "(%s/%s) User killed running job, but it was already dead" %
             (job.id, ext_id))
     except commands.CommandLineException as e:
         log.error(
             "(%s/%s) User killed running job, but command execution failed: %s"
             % (job.id, ext_id, unicodify(e)))
     except Exception:
         log.exception(
             "(%s/%s) User killed running job, but error encountered removing from DRM queue"
             % (job.id, ext_id))
Exemplo n.º 2
0
 def _get_slurm_state():
     cmd = ['scontrol', '-o']
     if '.' in ajs.job_id:
         # custom slurm-drmaa-with-cluster-support job id syntax
         job_id, cluster = ajs.job_id.split('.', 1)
         cmd.extend(['-M', cluster])
     else:
         job_id = ajs.job_id
         cluster = None
     cmd.extend(['show', 'job', job_id])
     try:
         stdout = commands.execute(cmd).strip()
     except commands.CommandLineException as e:
         if e.stderr == 'slurm_load_jobs error: Invalid job id specified\n':
             # The job may be old, try to get its state with sacct
             job_state = _get_slurm_state_with_sacct(job_id, cluster)
             if job_state:
                 return job_state
             return 'NOT_FOUND'
         raise e
     # stdout is a single line in format "key1=value1 key2=value2 ..."
     job_info_keys = []
     job_info_values = []
     for job_info in stdout.split():
         try:
             # Some value may contain `=` (e.g. `StdIn=StdIn=/dev/null`)
             k, v = job_info.split('=', 1)
             job_info_keys.append(k)
             job_info_values.append(v)
         except ValueError:
             # Some value may contain spaces (e.g. `Comment=** time_limit (60m) min_nodes (1) **`)
             job_info_values[-1] += f" {job_info}"
     job_info_dict = dict(zip(job_info_keys, job_info_values))
     return job_info_dict['JobState']
Exemplo n.º 3
0
 def _chown(path):
     try:
         # get username from email/username
         pwent = trans.user.system_user_pwent(
             trans.app.config.real_system_username)
         cmd = shlex.split(trans.app.config.external_chown_script)
         cmd.extend([path, pwent[0], str(pwent[3])])
     except Exception as e:
         log.debug('Failed to construct command to change ownership %s' %
                   unicodify(e))
     log.debug('Changing ownership of %s with: %s' % (path, ' '.join(cmd)))
     try:
         commands.execute(cmd)
     except commands.CommandLineException as e:
         log.warning('Changing ownership of uploaded file %s failed: %s',
                     path, unicodify(e))
Exemplo n.º 4
0
 def conda_info(self):
     if self.conda_exec is not None:
         info_out = commands.execute([self.conda_exec, "info", "--json"])
         info_out = unicodify(info_out)
         info = json.loads(info_out)
         return info
     else:
         return None
Exemplo n.º 5
0
def best_search_result(conda_target,
                       conda_context,
                       channels_override=None,
                       offline=False,
                       platform=None):
    """Find best "conda search" result for specified target.

    Return ``None`` if no results match.
    """
    search_cmd = []
    conda_exec = conda_context.conda_exec
    if isinstance(conda_exec, list):
        # for CondaInDockerContext
        search_cmd.extend(conda_exec)
    else:
        search_cmd.append(conda_exec)
    search_cmd.extend(["search", "--full-name", "--json"])
    if offline:
        search_cmd.append("--offline")
    if platform:
        search_cmd.extend(['--platform', platform])
    if channels_override:
        search_cmd.append("--override-channels")
        for channel in channels_override:
            search_cmd.extend(["--channel", channel])
    else:
        search_cmd.extend(conda_context._override_channels_args)
    search_cmd.append(conda_target.package)
    try:
        res = commands.execute(search_cmd)
        res = unicodify(res)
        # Use python's stable list sorting to sort by date,
        # then build_number, then version. The top of the list
        # then is the newest version with the newest build and
        # the latest update time.
        hits = json.loads(res).get(conda_target.package, [])[::-1]
        hits = sorted(hits, key=lambda hit: hit['build_number'], reverse=True)
        hits = sorted(hits,
                      key=lambda hit: packaging.version.parse(hit['version']),
                      reverse=True)
    except commands.CommandLineException:
        log.error("Could not execute: '%s'", search_cmd)
        hits = []

    if len(hits) == 0:
        return (None, None)

    best_result = (hits[0], False)

    for hit in hits:
        if is_search_hit_exact(conda_target, hit):
            best_result = (hit, True)
            break

    return best_result
Exemplo n.º 6
0
def count_lines(filename, non_empty=False):
    """
    counting the number of lines from the 'filename' file
    """
    if non_empty:
        cmd = ['grep', '-cve', r'^\s*$', filename]
    else:
        cmd = ['wc', '-l', filename]
    try:
        out = commands.execute(cmd)
    except commands.CommandLineException as e:
        log.error(unicodify(e))
        return 0
    return int(out.split()[0])
Exemplo n.º 7
0
def count_special_lines(word, filename, invert=False):
    """
    searching for special 'words' using the grep tool
    grep is used to speed up the searching and counting
    The number of hits is returned.
    """
    cmd = ["grep", "-c", "-E"]
    if invert:
        cmd.append('-v')
    cmd.extend([word, filename])
    try:
        out = commands.execute(cmd)
    except commands.CommandLineException:
        return 0
    return int(out)
Exemplo n.º 8
0
def condor_submit(submit_file):
    """
    Submit a condor job described by the given file. Parse an external id for
    the submission or return None and a reason for the failure.
    """
    external_id = None
    try:
        message = commands.execute(('condor_submit', submit_file))
    except commands.CommandLineException as e:
        message = unicodify(e)
    else:
        try:
            external_id = parse_external_id(message, type='condor')
        except Exception:
            message = PROBLEM_PARSING_EXTERNAL_ID
    return external_id, message
Exemplo n.º 9
0
 def _get_slurm_state_with_sacct(job_id, cluster):
     cmd = ['sacct', '-n', '-o', 'state%-32']
     if cluster:
         cmd.extend(['-M', cluster])
     cmd.extend(['-j', job_id])
     try:
         stdout = commands.execute(cmd)
     except commands.CommandLineException as e:
         if e.stderr.strip() == 'SLURM accounting storage is disabled':
             log.warning('SLURM accounting storage is not properly configured, unable to run sacct')
             return
         raise e
     # First line is for 'job_id'
     # Second line is for 'job_id.batch' (only available after the batch job is complete)
     # Following lines are for the steps 'job_id.0', 'job_id.1', ... (but Galaxy does not use steps)
     first_line = stdout.splitlines()[0]
     # Strip whitespaces and the final '+' (if present), only return the first word
     return first_line.strip().rstrip('+').split()[0]
Exemplo n.º 10
0
 def external_runjob(self, external_runjob_script, jobtemplate_filename, username):
     """ runs an external script that will QSUB a new job.
     The external script needs to be run with sudo, and will setuid() to the specified user.
     Effectively, will QSUB as a different user (than the one used by Galaxy).
     """
     cmd = shlex.split(external_runjob_script)
     cmd.extend([str(username), jobtemplate_filename])
     log.info(f"Running command: {' '.join(cmd)}")
     try:
         stdoutdata = commands.execute(cmd).strip()
     except commands.CommandLineException:
         log.exception("External_runjob failed")
         return None
     # The expected output is a single line containing a single numeric value:
     # the DRMAA job-ID. If not the case, will throw an error.
     if not stdoutdata:
         log.exception("External_runjob did not returned nothing instead of the job id")
         return None
     return stdoutdata
Exemplo n.º 11
0
def __unused_port_on_range(range):
    assert range[0] and range[1]

    # Find all ports that are already occupied
    cmd_netstat = ["netstat", "tuln"]
    stdout = commands.execute(cmd_netstat)

    occupied_ports = set()
    for line in stdout.split('\n'):
        if line.startswith('tcp') or line.startswith('tcp6'):
            col = line.split()
            local_address = col[3]
            local_port = local_address.split(':')[1]
            occupied_ports.add(int(local_port))

    # Generate random free port number.
    while True:
        port = random.randrange(range[0], range[1])
        if port not in occupied_ports:
            break
    return port
Exemplo n.º 12
0
    def _get_drmaa_state_qstat(self, job_id, extinfo):
        """
        get a (drmaa) job state with qstat. qstat only returns infos for jobs that
        are queued, suspended, ..., or just finished (i.e. jobs are still
        in the system).
        information on finished jobs can only be found by qacct.
        Hence if qstat does not contain information on the job
        the state is assumed as UNDETERMINED
        job_id the job id
        extinfo a set that additional information can be stored in, i.e., "deleted"
        returns the drmaa state
        """
        # log.debug("UnivaJobRunner._get_drmaa_state_qstat ({jobid})".format(jobid=job_id))
        # using -u "*" is the simplest way to query the jobs of all users which
        # allows to treat the case where jobs are submitted as real user it would
        # be more efficient to specify the user (or in case that the galaxy user
        # submits the job -> to ommit the -u option)

        # note that, this is preferred over using `qstat -j JOBID` which returns a non-zero
        # exit code in case of error as well as if the jobid is not found (if job is finished).
        # even if this could be disambiguated by the stderr message the `qstat -u "*"`
        # way seems more generic
        cmd = ['qstat', '-u', '"*"']
        try:
            stdout = commands.execute(cmd).strip()
        except commands.CommandLineException as e:
            log.error(unicodify(e))
            raise self.drmaa.InternalException()
        state = self.drmaa.JobState.UNDETERMINED
        for line in stdout.split('\n'):
            line = line.split()
            if len(line) >= 5 and line[0] == str(job_id):
                state = self._map_qstat_drmaa_states(job_id, line[5], extinfo)
                break
        # log.debug("UnivaJobRunner._get_drmaa_state_qstat ({jobid}) -> {state}".format(jobid=job_id, state=self.drmaa_job_state_strings[state]))
        return state
Exemplo n.º 13
0
def dockerfile_build(path, dockerfile=None, error=log.error, **kwds):
    expected_container_names = set()
    tool_directories = set()
    for (tool_path, tool_xml) in load_tool_elements_from_path(path):
        requirements, containers = parse_requirements_from_xml(tool_xml)
        for container in containers:
            if container.type == "docker":
                expected_container_names.add(container.identifier)
                tool_directories.add(os.path.dirname(tool_path))
                break

    if len(expected_container_names) == 0:
        error("Could not find any docker identifiers to generate.")

    if len(expected_container_names) > 1:
        error(
            "Multiple different docker identifiers found for selected tools [%s]",
            expected_container_names)

    image_identifier = expected_container_names.pop()

    dockerfile = __find_dockerfile(dockerfile, tool_directories)
    if dockerfile is not None:
        docker_command_parts = docker_util.build_command(
            image_identifier, dockerfile, **docker_host_args(**kwds))
    else:
        docker_command_parts = docker_util.build_pull_command(
            image_identifier, **docker_host_args(**kwds))
        commands.execute(docker_command_parts)

    commands.execute(docker_command_parts)
    docker_image_cache = kwds['docker_image_cache']
    if docker_image_cache:
        destination = docker_cache_path(docker_image_cache, image_identifier)
        save_image_command_parts = docker_util.build_save_image_command(
            image_identifier, destination, **docker_host_args(**kwds))
        commands.execute(save_image_command_parts)
Exemplo n.º 14
0
    def _get_drmaa_state_qacct(self, job_id, extinfo):
        '''
        get the job (drmaa) state with qacct.

        extinfo: dict where signal, exit_status, deleted = True, time_wasted, and memory_wasted can be stored:
        - signal signal as reported in exit state from qstat (see below)
        - exit_status set to exit status if returned (ie if qstat returns an exits state
            larger 0 and less 129 (for exit states > 128 signal is set)
            in any case (exit state > 0) state FAILED is returned
        - deleted set to true if the job was deleted (otherwise not set at all),
        - time_wasted time used in seconds (taken from wallclock)
        - memory_wasted memory used by the program in byte (taken from maxvmem)

        return state
        - first initalised with UNDETERMINED and changed in the following case
        - DONE if exit state == 0
        - FAILED if exit state != 0
        - RUNNING if failed in 24,25
        - FAILED if failed not in [0,24,25,100]
        '''
        # log.debug("UnivaJobRunner._get_drmaa_state_qacct ({jobid})".format(jobid=job_id))
        signals = {k: v for v, k in reversed(sorted(signal.__dict__.items()))
           if v.startswith('SIG') and not v.startswith('SIG_')}
        cmd = ['qacct', '-j', job_id]
        slp = 1
        # run qacct -j JOBID (since the accounting data for the job might not be
        # available immediately a simple retry mechanism is implemented ..
        # max wait is approx 1min)
        while True:
            try:
                stdout = commands.execute(cmd).strip()
            except commands.CommandLineException as e:
                if slp <= 32 and f"job id {job_id} not found" in e.stderr:
                    time.sleep(slp)
                    slp *= 2
                    continue
                else:
                    log.error(unicodify(e))
                    return self.drmaa.JobState.UNDETERMINED
            else:
                break
        qacct = dict()
        for line in stdout.split("\n"):
            # remove header
            if line.startswith("=") or line == "":
                continue
            line = line.split()
            qacct[line[0]] = " ".join(line[1:])
        # qacct has three fields of interest: failed, exit_status, deleted_by
        # experiments
        #            failed  exit_status deleted_by
        # BASH ------------------------------------
        # time-limit 100     137
        # mem-limit  0       2
        # python --------------------------------------------------------------
        # time-limit
        # mem-limit  0       1
        # C -------------------------------------------------------------------
        # time-limit
        # mem-limit  0       C programm either have segfault (139) or allocated memory is checked for NULL (then a programmer defined message/exit code is given)
        #                    note that max_vmem might not be reliable, since the program never gets the memory.
        # C++ -----------------------------------------------------------------
        # time-limit
        # mem-limit  0       same as for C programs
        # JAVA ----------------------------------------------------------------
        # time-limit
        # mem-limit
        # perl ----------------------------------------------------------------
        # time-limit
        # mem-limit
        # bash other tests ----------------------------------------------------
        # qdel       100     137          user@mail

        extinfo["time_wasted"] = _parse_time(qacct["wallclock"])
        extinfo["memory_wasted"] = size_to_bytes(qacct["maxvmem"])
        extinfo["slots"] = int(qacct["slots"])

        # deleted_by
        # If the job (the array task) has been deleted via qdel, "<username>@<hostname>", else
        # "NONE". If qdel was called multiple times, every invocation is recorded in a comma
        # separated list.
        if "deleted_by" in qacct and qacct["deleted_by"] != "NONE":
            log.info(f"DRMAAUniva: job {job_id} was aborted by {qacct['deleted_by']}")
            extinfo["deleted"] = True
            return self.drmaa.JobState.FAILED

        state = self.drmaa.JobState.UNDETERMINED
        # exit_status
        # Exit status of the job script (or Univa Grid Engine specific status in case of certain
        # error conditions). The exit status is determined by following the normal shell conventions
        # If the command terminates normally the value of the command is its exit status.
        # However, in the case that the command exits abnormally, a value of 0200 (octal), 128
        # (decimal) is added to the value of the command to make up the exit status.
        # For example: If a job dies through signal 9 (SIGKILL) then the exit status
        # becomes 128 + 9 = 137.
        if "exit_status" in qacct:
            qacct["exit_status"] = int(qacct["exit_status"])
            if qacct["exit_status"] < 1:
                log.error(f"DRMAAUniva: job {job_id} has exit status {qacct['exit_status']}")
                state = self.drmaa.JobState.DONE
            elif 0 < qacct["exit_status"] < 129:
                log.error(f"DRMAAUniva: job {job_id} has exit status {qacct['exit_status']}")
                extinfo['exit_status'] = qacct["exit_status"]
                state = self.drmaa.JobState.FAILED
            else:
                log.error(f"DRMAAUniva: job {job_id} was killed by signal {qacct['exit_status'] - 128}")
                state = self.drmaa.JobState.FAILED
                extinfo["signal"] = signals[qacct["exit_status"] - 128]

        # failed
        # Indicates the problem which occurred in case a job could not be started on the execution
        # host (e.g. because the owner of the job did not have a valid account on that
        # machine). If Univa Grid Engine tries to start a job multiple times, this may lead to
        # multiple entries in the accounting file corresponding to the same job ID.
        # for the codes see https://docs.oracle.com/cd/E19957-01/820-0699/chp11-2/index.html
        if "failed" in qacct:
            code = int(qacct["failed"].split()[0])
            # this happens in case of no error or exit_code!=0 (0) or a signal (100).
            # both cases are covered already
            if code in [0, 100]:
                pass
            # these seem to be OK as well
            elif code in [24, 25]:
                state = self.drmaa.JobState.RUNNING
            else:
                log.error(f"DRMAAUniva: job {job_id} failed with failure {qacct['failed']}")
                state = self.drmaa.JobState.FAILED
        # log.debug("UnivaJobRunner._get_drmaa_state_qacct ({jobid}) -> {state}".format(jobid=job_id, state=self.drmaa_job_state_strings[state]))
        return state
Exemplo n.º 15
0
    def authenticate(self, email, username, password, options):
        pam_username = None
        auto_register_username = None
        auto_register_email = None
        force_fail = False
        if not options['redact_username_in_logs']:
            log.debug(f"use username: {options.get('login-use-username')} use email {options.get('login-use-email', False)} email {email} username {username}")
        # check email based login first because if email exists in Galaxy DB
        # we will be given the "public name" as username
        if string_as_bool(options.get('login-use-email', False)) and email is not None:
            if '@' in email:
                (email_user, email_domain) = email.split('@')
                pam_username = email_user
                if email_domain == options.get('maildomain', None):
                    auto_register_email = email
                    if username is not None:
                        auto_register_username = username
                    else:
                        auto_register_username = email_user
                else:
                    log.debug('PAM authenticate: warning: email does not match configured PAM maildomain')
                    # no need to fail: if auto-register is not enabled, this
                    # might still be a valid user
            else:
                log.debug('PAM authenticate: email must be used to login, but no valid email found')
                force_fail = True
        elif string_as_bool(options.get('login-use-username', False)):
            # if we get here via authenticate_user then
            # user will be "public name" and
            # email address will be as per registered user
            if username is not None:
                pam_username = username
                if email is not None:
                    auto_register_email = email
                elif options.get('maildomain', None) is not None:
                    # we can register a user with this username and mail domain
                    # if auto registration is enabled
                    auto_register_email = f"{username}@{options['maildomain']}"
                auto_register_username = username
            else:
                log.debug('PAM authenticate: username login selected but no username provided')
                force_fail = True
        else:
            log.debug('PAM authenticate: could not find username for PAM')
            force_fail = True

        if force_fail:
            return None, '', ''

        pam_service = options.get('pam-service', 'galaxy')
        use_helper = string_as_bool(options.get('use-external-helper', False))
        log.debug(f"PAM auth: will use external helper: {use_helper}")
        authenticated = False
        if use_helper:
            authentication_helper = options.get('authentication-helper-script', '/bin/false').strip()
            log.debug(f"PAM auth: external helper script: {authentication_helper}")
            if not authentication_helper.startswith('/'):
                # don't accept relative path
                authenticated = False
            else:
                auth_cmd = shlex.split(f'/usr/bin/sudo -n {authentication_helper}')
                log.debug(f"PAM auth: external helper cmd: {auth_cmd}")
                message = f'{pam_service}\n{pam_username}\n{password}\n'
                try:
                    output = commands.execute(auth_cmd, input=message)
                except commands.CommandLineException as e:
                    if e.stderr != '':
                        log.debug(f"PAM auth: external authentication script had errors: status {e.returncode} error {e.stderr}")
                    output = e.stdout
                if output.strip() == 'True':
                    authenticated = True
                else:
                    authenticated = False
        else:
            try:
                import pam
            except ImportError:
                log.debug('PAM authenticate: could not load pam module, PAM authentication disabled')
                return None, '', ''

            p_auth = pam.pam()
            authenticated = p_auth.authenticate(pam_username, password, service=pam_service)

        if authenticated:
            log.debug(f"PAM authentication successful for {'redacted' if options['redact_username_in_logs'] else pam_username}")
            return True, auto_register_email, auto_register_username
        else:
            log.debug(f"PAM authentication failed for {'redacted' if options['redact_username_in_logs'] else pam_username}")
            return False, '', ''