Пример #1
0
def fastq_screen_tag(conf_file,
                     fastq_in,
                     out_dir,
                     aligner=None,
                     threads=1,
                     tempdir=None):
    """
    Run 'fastq_screen' and output tagged fastq file

    Raises an Exception in the event of an error.

    Arguments:
      conf_file (str): path to the fastq_screen .conf file
      fastq_in (str): path to the FASTQ file to screen
      out_dir (str): path to the output directory to put
        the tagged FASTQ in
      aligner (str): optional, name of the aligner to pass
        to fastq_screen (default: don't specify the aligner)
      threads (int): optional, the number of threads to
        use when running fastq_screen (default: 1)
      tempdir (str): optional, directory to create temporary
        working directories in when running fastq_screen

    Returns:
      String: path to the tagged output FASTQ file
    """
    # Make a temporary working directory
    work_dir = tempfile.mkdtemp(suffix='.fastq_screen', dir=tempdir)
    # Build fastq_screen command
    fastq_screen_cmd = Command('fastq_screen', '--subset', 0, '--threads',
                               threads, '--conf', conf_file, '--tag',
                               '--outdir', work_dir)
    if args.aligner is not None:
        fastq_screen_cmd.add_args('--aligner', args.aligner)
    fastq_screen_cmd.add_args(fastq_in)
    print "Running %s" % fastq_screen_cmd
    # Run the command
    exit_code = fastq_screen_cmd.run_subprocess(working_dir=work_dir)
    if exit_code != 0:
        err_msg = "Screening %s against %s failed (exit code %d)" % \
                  (fastq_in,conf_file,exit_code)
    else:
        # Handle the outputs
        tagged_fastq = os.path.basename(strip_ext(fastq_in,'.fastq')) \
                       + '.tagged.fastq'
        if not os.path.exists(os.path.join(work_dir, tagged_fastq)):
            err_msg = "Failed to generated tagged fastq file %s" % \
                      tagged_fastq
            exit_code = 1
        else:
            os.rename(os.path.join(work_dir, tagged_fastq),
                      os.path.join(out_dir, tagged_fastq))
    # Clean up working directory
    shutil.rmtree(work_dir)
    # Raise exception if there was a problem
    if exit_code != 0:
        raise Exception(err_msg)
    # Return path to tagged file
    return os.path.join(out_dir, tagged_fastq)
Пример #2
0
class PipelineCommandWrapper(PipelineCommand):
    """
    Class for constructing program command lines

    This class is based on the PipelineCommand class but
    can be used directly (rather than needing to be
    subclassed).

    For example, to wrap the 'ls' command directly:

    >>> ls_command = PipelineCommandWrapper("List directory",'ls',dirn)

    It is also possible to extend the command line
    using the 'add_args' method, for example:

    >>> ls_command = PipelineCommandWrapper("List directory",'ls')
    >>> ls.command.add_args(dirn)
    """
    def __init__(self, name, *args):
        """
        Create a new PipelineCommandWrapper instance

        Arguments:
          name (str): arbitrary name for the command
          args  (List): initial list of arguments making
            up the command
        """
        PipelineCommand.__init__(self, *args)
        self._name = str(name)
        self._cmd = None
        if args:
            self._cmd = Command(*args)

    def add_args(self, *args):
        """
        Add additional arguments to extend the command being built

        Arguments:
          args  (List): one or more arguments to append to
            the command
        """
        if self._cmd is None:
            self._cmd = Command(*args)
        else:
            self._cmd.add_args(*args)

    def init(self, *args):
        """
        Internal: dummy init which does nothing
        """
        pass

    def cmd(self):
        """
        Internal: implement the 'cmd' method
        """
        return self._cmd
Пример #3
0
def batch_fastqs(fastqs,batch_size,basename="batched",
                 out_dir=None):
    """
    Splits reads from one or more Fastqs into batches

    Concatenates input Fastq files and then splits
    reads into smaller Fastqs using the external 'batch'
    utility.

    Arguments:
      fastqs (list): list of paths to one or more Fastq
        files to take reads from
      batch_size (int): number of reads to allocate to
        each batch
      basename (str): optional basename to use for the
        output Fastq files (default: 'batched')
      out_dir (str): optional path to a directory where
        the batched Fastqs will be written
    """
    # Determine number of batches
    nreads = get_read_count(fastqs)
    nbatches = nreads/batch_size
    if nbatches*batch_size < nreads:
        nbatches += 1
    print "Creating %d batches of %d reads" % (nbatches,
                                               batch_size)
    assert(batch_size*nbatches >= nreads)

    # Check if fastqs are compressed
    gzipped = fastqs[0].endswith('.gz')
    if gzipped:
        batch_cmd = Command('zcat')
    else:
        batch_cmd = Command('cat')

    # Get the read number
    read_number = get_read_number(fastqs[0])
    suffix = ".r%s.fastq" % read_number

    # Build and run the batching command
    batch_cmd.add_args(*fastqs)
    batch_cmd.add_args('|',
                       'split',
                       '-l',batch_size*4,
                       '-d',
                       '-a',3,
                       '--additional-suffix=%s' % suffix,
                       '-',
                       os.path.join(out_dir,"%s.B" % basename))
    batch_script = os.path.join(out_dir,"batch.sh")
    batch_cmd.make_wrapper_script("/bin/bash",
                                  batch_script)

    # Check for successful exit code
    retcode = Command("/bin/bash",
                      batch_script).run_subprocess(
                          working_dir=out_dir)
    if retcode != 0:
        raise Exception("Batching failed: exit code %s" % retcode)
    print "Batching completed"

    # Collect and return the batched Fastq names
    batched_fastqs = [os.path.join(out_dir,
                                   "%s.B%03d%s"
                                   % (basename,i,suffix))
                      for i in xrange(0,nbatches)]
    return batched_fastqs
Пример #4
0
    # Run the QC
    announce("Running QC")
    max_jobs = __settings.general.max_concurrent_jobs
    sched = SimpleScheduler(runner=qc_runner,
                            max_concurrent=max_jobs)
    sched.start()
    for sample in samples:
        print "Checking/setting up for sample '%s'" % sample.name
        for fq in sample.fastq:
            if sample.verify_qc(qc_dir,fq):
                print "-- %s: QC ok" % fq
            else:
                print "-- %s: setting up QC" % fq
                qc_cmd = Command('illumina_qc.sh',fq)
                if args.nthreads > 1:
                    qc_cmd.add_args('--threads',args.nthreads)
                qc_cmd.add_args('--subset',args.fastq_screen_subset,
                                '--qc_dir',qc_dir)
                job = sched.submit(qc_cmd,
                                   wd=project.dirn,
                                   name="%s.%s" % (qc_base,
                                                   os.path.basename(fq)),
                                   log_dir=log_dir)
                print "Job: %s" % job
    # Wait for the scheduler to run all jobs
    sched.wait()
    sched.stop()

    # Verify the QC
    announce("Verifying QC")
    qc_ok = True