示例#1
0
def is_paired(bam_file):
    """Determine if a BAM file has paired reads.

    Works around issues with head closing the samtools pipe using signal trick from:
    http://stackoverflow.com/a/12451083/252589
    """
    bam_file = objectstore.cl_input(bam_file)
    cmd = ("set -o pipefail; "
           "samtools view -h {bam_file} | head -300000 | "
           "samtools view -S -f 1 /dev/stdin  | head -1 | wc -l")
    p = subprocess.Popen(
        cmd.format(**locals()),
        shell=True,
        executable=do.find_bash(),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL))
    stdout, stderr = p.communicate()
    stdout = stdout.decode()
    stderr = stderr.decode()
    stderr = stderr.strip()
    if ((p.returncode == 0 or p.returncode == 141) and
        (stderr == "" or
         (stderr.startswith("gof3r") and stderr.endswith("broken pipe")))):
        return int(stdout) > 0
    else:
        raise ValueError("Failed to check paired status of BAM file: %s" %
                         str(stderr))
示例#2
0
def _merge_and_bgzip(orig_files, out_file, base_file, ext=""):
    """Merge a group of gzipped input files into a final bgzipped output.

    Also handles providing unique names for each input file to avoid
    collisions on multi-region output. Handles renaming with awk magic from:
    https://www.biostars.org/p/68477/

    Removes orig_files after merging.
    """
    assert out_file.endswith(".gz")
    full_file = out_file.replace(".gz", "")
    run_file = "%s-merge.bash" % utils.splitext_plus(base_file)[0]

    cmds = ["set -e\n"]
    for i, fname in enumerate(orig_files):
        cmd = ("""zcat %s | awk '{print (NR%%4 == 1) ? "@%s_" ++i "%s" : $0}' >> %s\n"""
               % (fname, i, ext, full_file))
        cmds.append(cmd)
    cmds.append("bgzip %s\n" % full_file)

    with open(run_file, "w") as out_handle:
        out_handle.write("".join("".join(cmds)))
    do.run([do.find_bash(), run_file], "Rename, merge and bgzip CRAM fastq output")
    assert os.path.exists(out_file) and not _is_gzip_empty(out_file)
    for fname in orig_files:
        os.remove(fname)
示例#3
0
def is_paired(bam_file):
    """Determine if a BAM file has paired reads.
    """
    bam_file = objectstore.cl_input(bam_file)
    cmd = "sambamba view -h {bam_file} | head -50000 | " "sambamba view -S -F paired /dev/stdin  | head -1 | wc -l"
    out = subprocess.check_output(
        cmd.format(**locals()), shell=True, executable=do.find_bash(), stderr=open("/dev/null", "w")
    )
    return int(out) > 0
示例#4
0
def is_paired(bam_file):
    """Determine if a BAM file has paired reads.
    """
    bam_file = objectstore.cl_input(bam_file)
    cmd = ("sambamba view -h {bam_file} | head -50000 | "
           "sambamba view -S -F paired /dev/stdin  | head -1 | wc -l")
    out = subprocess.check_output(cmd.format(**locals()), shell=True,
                                  executable=do.find_bash(),
                                  stderr=open("/dev/null", "w"))
    return int(out) > 0
示例#5
0
def is_empty(bam_file):
    """Determine if a BAM file is empty
    """
    bam_file = objectstore.cl_input(bam_file)
    cmd = ("set -o pipefail; "
           "samtools view {bam_file} | head -1 | wc -l")
    p = subprocess.Popen(cmd.format(**locals()), shell=True,
                         executable=do.find_bash(),
                         stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                         preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL))
    stdout, stderr = p.communicate()
    stderr = stderr.strip()
    if ((p.returncode == 0 or p.returncode == 141) and
         (stderr == "" or (stderr.startswith("gof3r") and stderr.endswith("broken pipe")))):
        return int(stdout) == 0
    else:
        raise ValueError("Failed to check empty status of BAM file: %s" % str(stderr))
示例#6
0
def is_empty(bam_file):
    """Determine if a BAM file is empty
    """
    bam_file = objectstore.cl_input(bam_file)
    sambamba = config_utils.get_program("sambamba", {})
    cmd = ("set -o pipefail; "
           "{sambamba} view {bam_file} | head -1 | wc -l")
    p = subprocess.Popen(cmd.format(**locals()), shell=True,
                         executable=do.find_bash(),
                         stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                         preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL))
    stdout, stderr = p.communicate()
    stderr = stderr.strip()
    if ((p.returncode == 0 or p.returncode == 141) and
         (stderr == "" or (stderr.startswith("gof3r") and stderr.endswith("broken pipe")))):
        return int(stdout) == 0
    else:
        raise ValueError("Failed to check empty status of BAM file: %s" % str(stderr))
示例#7
0
def is_paired(bam_file):
    """Determine if a BAM file has paired reads.

    Works around issues with head closing the samtools pipe using signal trick from:
    http://stackoverflow.com/a/12451083/252589
    """
    bam_file = objectstore.cl_input(bam_file)
    cmd = ("set -o pipefail; "
           "sambamba view -h {bam_file} | head -50000 | "
           "sambamba view -S -F paired /dev/stdin  | head -1 | wc -l")
    p = subprocess.Popen(cmd.format(**locals()), shell=True,
                         executable=do.find_bash(),
                         stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                         preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL))
    stdout, stderr = p.communicate()
    if p.returncode == 0 or p.returncode == 141 and stderr.strip() == "":
        return int(stdout) > 0
    else:
        raise ValueError("Failed to check paired status of BAM file: %s" % str(stderr))