def run_bowtie2(paired_end_mapping,
                genome,
                output_path,
                disable_parallel=False):
    bowtie2_logger = _logshim.getLogger('run_bowtie2')

    # Import the config file to get genome locations
    config = _script_helpers.get_config()

    if disable_parallel:
        shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger)
    else:
        shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger,
                                                          delay_seconds=60)

    for output_prefix, paired_ends in paired_end_mapping.iteritems():
        bowtie2_logger.info('Spawning niced process for bowtie2 on: %s' %
                            (output_prefix))
        for filename in paired_ends:
            assert (" " not in filename)
            assert (";" not in filename
                    )  # Vague sanity testing for input filenames
            bowtie2_logger.debug('    Input: %s' % (filename))

        # bowtie2 options:
        # --end-to-end: this is the default, but let's explicitly specify it
        # --sensitive: again, the default (consider switching to --fast?)
        # --no-unal: Suppress unaligned reads from the output .sam
        # --no-discordant: These are paired-end reads. We expect them to be non-discordant.
        # --mm: mmap MAP_SHARED (other processes can use our genome, cool!)
        # --met-stderr: Write metrics to stderr
        # --time: output the time things took
        # -x: target genome
        command = "bowtie2 --end-to-end --sensitive --no-unal --no-discordant --mm --met-stderr --time -x %s -1 %s -2 %s 2>%s | samtools view -bS - >%s"

        shell_job_runner.run(
            command %
            (config['bowtie2_genomes'][genome], paired_ends[0], paired_ends[1],
             output_path + "/" + output_prefix + ".bt2.log",
             output_path + "/" + output_prefix + ".bt2.bam"))

    shell_job_runner.finish()
def run_bowtie2(paired_end_mapping, genome, output_path, disable_parallel=False):
    bowtie2_logger = _logshim.getLogger('run_bowtie2')

    # Import the config file to get genome locations
    config = _script_helpers.get_config()

    if disable_parallel:
        shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger)
    else:
        shell_job_runner = _script_helpers.ShellJobRunner(bowtie2_logger, delay_seconds=60)

    for output_prefix, paired_ends in paired_end_mapping.iteritems():
        bowtie2_logger.info('Spawning niced process for bowtie2 on: %s' % (output_prefix))
        for filename in paired_ends:
            assert(" " not in filename)
            assert(";" not in filename)  # Vague sanity testing for input filenames
            bowtie2_logger.debug('    Input: %s' % (filename))

        # bowtie2 options:
        # --end-to-end: this is the default, but let's explicitly specify it
        # --sensitive: again, the default (consider switching to --fast?)
        # --no-unal: Suppress unaligned reads from the output .sam
        # --no-discordant: These are paired-end reads. We expect them to be non-discordant.
        # --mm: mmap MAP_SHARED (other processes can use our genome, cool!)
        # --met-stderr: Write metrics to stderr
        # --time: output the time things took
        # -x: target genome
        command = "bowtie2 --end-to-end --sensitive --no-unal --no-discordant --mm --met-stderr --time -x %s -1 %s -2 %s 2>%s | samtools view -bS - >%s"

        shell_job_runner.run(command % (config['bowtie2_genomes'][genome],
                                        paired_ends[0],
                                        paired_ends[1],
                                        output_path + "/" + output_prefix + ".bt2.log",
                                        output_path + "/" + output_prefix + ".bt2.bam"))

    shell_job_runner.finish()
示例#3
0
__copyright__ = 'Gordon Lab at Washington University in St. Louis'
__license__ = 'MIT'
__version__ = '1.0.3'

import _logshim
import _script_helpers
import argparse
import glob
import os
import tempfile

# A parameter needed by samtools to sort in-memory.
MAX_MEM = "50G"

# Load our config files
CONFIG = _script_helpers.get_config()


def large_filter_fixmate_and_sort(input_files, genome, output_path, disable_parallel=False):
    primary_logger = _logshim.getLogger('first_pass')

    output_suffix = ".tmp"

    if disable_parallel:  # Doesn't change parallelism in last samtools sort
        shell_job_runner = _script_helpers.ShellJobRunner(primary_logger)
    else:
        shell_job_runner = _script_helpers.ShellJobRunner(primary_logger, delay_seconds=60)

    # We do a few things here:
    #  - View only mapping quality >= 10
    #  - Remove chrM