Пример #1
0
def kill_job_ids(job_ids):
    """
    Kills qsub jobs by issuing the ``qdel`` command

    Parameters
    ----------
    job_ids: list
        a list of job ID numbers

    Examples
    --------
    Example usage::

        import qsub
        job_ids = ['4104004', '4104006', '4104009']
        qsub.kill_job_ids(job_ids = job_ids)

    """
    if job_ids:
        logger.debug('Killing jobs: {0}'.format(job_ids))
        qdel_command = 'qdel {0}'.format(' '.join([job_id for job_id in job_ids]))
        cmd = tools.SubprocessCmd(command = qdel_command).run()
        logger.debug(cmd.proc_stdout)
        logger.debug(cmd.proc_stderr)
    else:
        logger.debug("No jobs passed")
Пример #2
0
def get_qacct(job_id):
    """
    Gets the qacct entry for a completed qsub job
    """
    qacct_command = 'qacct -j {0}'.format(job_id)
    run_cmd = tools.SubprocessCmd(command = qacct_command).run()
    return(run_cmd.proc_stdout)
Пример #3
0
def vcf2annovar(vcf_file, **kwargs):
    """
    Converts a .vcf file to ANNOVAR .avinput format, using ANNOVAR ``convert2annovar.pl``

    Parameters
    ----------
    vcf_file: str
        the path to a .vcf file

    Keyword Arguments
    -----------------
    output_file: str
        the path to the ``.avinput`` file to be created, or ``None``
    bin_dir: str
        path to the ANNOVAR installation directory, or ``None`` to use the internally set default location

    Notes
    -----
    Generates and executes a shell command in the format::

        annovar/convert2annovar.pl -format vcf4old /data/output/169.duplications.vcf -includeinfo > /data/output/169.duplications.avinput

    Returns
    -------
    str
        the path to the output ``.avinput`` file

    """
    bin_dir = kwargs.pop('bin_dir', configs['ANNOVAR_bin_dir'])
    output_file = kwargs.pop('output_file', os.path.splitext(vcf_file)[0] + '.avinput')

    # make sure input file exists
    tools.missing_item_kill(item = vcf_file, logger = logger)

    # path to binary to use
    convert_bin = os.path.join(bin_dir, 'convert2annovar.pl')
    tools.missing_item_kill(item = convert_bin, logger = logger)

    # shell command to run
    convert_command = '''
"{0}" -format vcf4old "{1}" -includeinfo > "{2}"
    '''.format(
    convert_bin, # 0
    vcf_file, # 1
    output_file # 2
    )

    # run
    logger.debug(convert_command)
    run_cmd = tools.SubprocessCmd(command = convert_command).run()
    logger.debug(run_cmd.proc_stdout)
    logger.debug(run_cmd.proc_stderr)

    # make sure output file exists
    tools.missing_item_kill(item = output_file, logger = logger)

    return(output_file)
Пример #4
0
def kill_jobs(jobs):
    """
    Kills qsub jobs by issuing the ``qdel`` command

    Parameters
    ----------
    jobs: list
        a list of ``Job`` objects
    """
    if jobs:
        logger.debug('Killing jobs: {0}'.format(jobs))
        qdel_command = 'qdel {0}'.format(' '.join([job.id for job in jobs]))
        cmd = tools.SubprocessCmd(command = qdel_command).run()
        logger.debug(cmd.proc_stdout)
        logger.debug(cmd.proc_stderr)
    else:
        logger.debug("No jobs passed")
Пример #5
0
    def get_qacct(self, job_id = None):
        """
        Gets the `qacct` entry for a completed qsub job, used to determine if the job completed successfully

        Notes
        -----
        This operation is extremely slow, takes about 10 - 30+ seconds to complete

        Returns
        -------
        str
            The character string representation of the stdout from the `qacct -j` command for the job
        """
        if not job_id:
            job_id = self.id
        qacct_command = 'qacct -j {0}'.format(job_id)
        run_cmd = tools.SubprocessCmd(command = qacct_command).run()
        return(run_cmd.proc_stdout)
Пример #6
0
def monitor_jobs(jobs = None, kill_err = True, print_verbose = False, **kwargs):
    """
    Monitors a list of qsub `Job` objects for completion. Job monitoring is accomplished by calling each job's `present()` and `error()` methods, then waiting for several seconds. Jobs that are no longer present in `qstat` or have an error state will be removed from the monitoring queue. The function will repeatedly check each job and then wait, removing absent or errored jobs, until no jobs remain in the monitoring queue. Optionally, jobs that had an error status will be killed with the `qdel` command, or else they will remain in `qstat` indefinitely.

    This function allows your program to wait for jobs to finish running before continuing.

    Parameters
    ----------
    jobs: list
        a list of `Job` objects
    kill_err: bool
        `True` or `False`, whether or not jobs left in error state should be automatically killed. Its recommened to leave this `True`
    print_verbose: bool
        whether or not descriptions of the steps being taken should be printed to the console with Python's `print` function

    Returns
    -------
    tuple
        a tuple of lists containing `Job` objects, in the format: `(completed_jobs, err_jobs)`

    Notes
    -----
    This function will only check whether a job is present/absent in the `qstat` queue, or in an error state in the `qstat` queue; it does not actually check if a job is in a 'Running' state.

    If a job is present and not in error state, it is assumed to either be 'qw' (waiting to run), or 'r' (running). In both cases, it is assumed that the job will eventually finish and leave the `qstat` queue, and subsequently be removed from this function's monitoring queue.

    Jobs in 'Eqw' error state are stuck and will not leave on their own so must be removed automatically by this function, or killed manually by the end user.

    The ``jobs`` is mutable and passed by reference; this means that upon completion of this function, the original ``jobs`` list will be depleted::

        >>> import qsub
        >>> jobs = []
        >>> len(jobs)
        0
        >>> for i in range(5):
        ...     job = qsub.submit('sleep 20')
        ...     jobs.append(job)
        ...
        >>> len(jobs)
        5
        >>> qsub.monitor_jobs(jobs = jobs)
        ([Job(id = 4098911, name = python, log_dir = None), Job(id = 4098913, name = python, log_dir = None), Job(id = 4098915, name = python, log_dir = None), Job(id = 4098912, name = python, log_dir = None), Job(id = 4098914, name = python, log_dir = None)], [])
        >>> len(jobs)
        0

    Examples
    --------
    Example usage::

        job = submit(print_verbose = True)
        completed_jobs, err_jobs = monitor_jobs([job], print_verbose = True)
        [job.validate_completion() for job in completed_jobs]

    """
    # make sure jobs were passed
    if not jobs or len(jobs) < 1:
        logger.error('No jobs to monitor')
        return()
    # make sure jobs is a list
    if not isinstance(jobs, list):
        logger.error('"jobs" passed is not a list')
        return()

    completed_jobs = []
    # jobs in error state; won't finish
    err_jobs = []
    num_jobs = len(jobs)
    logger.debug('Monitoring jobs for completion. Number of jobs in queue: {0}'.format(num_jobs))
    if print_verbose: print('Monitoring jobs for completion. Number of jobs in queue: {0}'.format(num_jobs))
    while num_jobs > 0:
        # check number of jobs in the list
        if num_jobs != len(jobs):
            num_jobs = len(jobs)
            logger.debug("Number of jobs in queue: {0}".format(num_jobs))
            if print_verbose: print("Number of jobs in queue: {0}".format(num_jobs))
        # check each job for presence & error state
        for i, job in enumerate(jobs):
            if not job.present():
                completed_jobs.append(jobs.pop(i)) # jobs.remove(job)
            if job.error():
                err_jobs.append(jobs.pop(i))
        sleep(5)
    logger.debug('No jobs remaining in the job queue')
    if print_verbose: print('No jobs remaining in the job queue')

    # check if there were any jobs left in error state
    if err_jobs:
        logger.error('{0} jobs left were left in error state. Jobs: {1}'.format(len(err_jobs), [job.id for job in err_jobs]))
        if print_verbose: print('{0} jobs left were left in error state. Jobs: {1}'.format(len(err_jobs), [job.id for job in err_jobs]))
        # kill the error jobs with the 'qdel' command
        if kill_err:
            logger.debug('Killing jobs left in error state')
            if print_verbose: print('Killing jobs left in error state')
            qdel_command = 'qdel {0}'.format(' '.join([job.id for job in err_jobs]))
            cmd = tools.SubprocessCmd(command = qdel_command).run()
            logger.debug(cmd.proc_stdout)
            if print_verbose: print(cmd.proc_stdout)
    return((completed_jobs, err_jobs))
Пример #7
0
def table_annovar(avinput_file, **kwargs):
    """
    Runs ANNOVAR ``table_annovar.pl``

    Parameters
    ----------
    avinput_file: str
        path to ANNOVAR format ``.avinput`` file

    Keyword Arguments
    -----------------
    output_file_base: str
        file path base for the annotated output file; `` `` will be automatically appended by ANNOVAR
    bin_dir: str
        path to the ANNOVAR installation directory
    db_dir: str
        path to the ANNOVAR database directory
    buildver: str
        the build version to use, e.g. "hg19"

    Notes
    -----
    Generates and executes a shell command in the format::

        perl "/annovar/table_annovar.pl" "example-data/Sample1.avinput" "/annovar/db" --outfile "example-data/Sample1" --buildver "hg19" --protocol "cytoBand,refGene" --operation "r,g" --nastring "." --remove

    Returns
    -------
    """
    # get keyword arguments
    bin_dir = kwargs.pop('bin_dir', configs['ANNOVAR_bin_dir'])
    db_dir = kwargs.pop('db_dir', configs['ANNOVAR_db_dir'])
    buildver = kwargs.pop('db_dir', configs['ANNOVAR_buildver'])
    protocol = kwargs.pop('protocol', configs['ANNOVAR_protocol'])
    operation = kwargs.pop('operation', configs['ANNOVAR_operation'])
    output_file_base = kwargs.pop('operation', os.path.splitext(avinput_file)[0])

    # make sure input file exists
    tools.missing_item_kill(item = avinput_file, logger = logger)

    # expected output file
    output_suffix = '.{0}_multianno.txt'.format(buildver)
    multianno_output = output_file_base + output_suffix

    table_annovar_bin = os.path.join(bin_dir, 'table_annovar.pl')

    table_annovar_command = '''
"{0}" "{1}" "{2}" --outfile "{3}" --buildver "{4}" --protocol "{5}" --operation "{6}" --nastring "." --remove
    '''.format(
    table_annovar_bin, # 0
    avinput_file, # 1
    db_dir, # 2
    output_file_base, # 3
    buildver, # 4
    protocol, # 5
    operation # 6
    )

    logger.debug(table_annovar_command)
    run_cmd = tools.SubprocessCmd(command = table_annovar_command).run()
    logger.debug(run_cmd.proc_stdout)
    logger.debug(run_cmd.proc_stderr)

    # make sure output file exists
    tools.missing_item_kill(item = multianno_output, logger = logger)

    return(multianno_output)