def is_paired(bam_file): """Determine if a BAM file has paired reads. Works around issues with head closing the samtools pipe using signal trick from: http://stackoverflow.com/a/12451083/252589 """ bam_file = objectstore.cl_input(bam_file) cmd = ("set -o pipefail; " "samtools view -h {bam_file} | head -300000 | " "samtools view -S -f 1 /dev/stdin | head -1 | wc -l") p = subprocess.Popen( cmd.format(**locals()), shell=True, executable=do.find_bash(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)) stdout, stderr = p.communicate() stdout = stdout.decode() stderr = stderr.decode() stderr = stderr.strip() if ((p.returncode == 0 or p.returncode == 141) and (stderr == "" or (stderr.startswith("gof3r") and stderr.endswith("broken pipe")))): return int(stdout) > 0 else: raise ValueError("Failed to check paired status of BAM file: %s" % str(stderr))
def _merge_and_bgzip(orig_files, out_file, base_file, ext=""): """Merge a group of gzipped input files into a final bgzipped output. Also handles providing unique names for each input file to avoid collisions on multi-region output. Handles renaming with awk magic from: https://www.biostars.org/p/68477/ Removes orig_files after merging. """ assert out_file.endswith(".gz") full_file = out_file.replace(".gz", "") run_file = "%s-merge.bash" % utils.splitext_plus(base_file)[0] cmds = ["set -e\n"] for i, fname in enumerate(orig_files): cmd = ("""zcat %s | awk '{print (NR%%4 == 1) ? "@%s_" ++i "%s" : $0}' >> %s\n""" % (fname, i, ext, full_file)) cmds.append(cmd) cmds.append("bgzip %s\n" % full_file) with open(run_file, "w") as out_handle: out_handle.write("".join("".join(cmds))) do.run([do.find_bash(), run_file], "Rename, merge and bgzip CRAM fastq output") assert os.path.exists(out_file) and not _is_gzip_empty(out_file) for fname in orig_files: os.remove(fname)
def is_paired(bam_file): """Determine if a BAM file has paired reads. """ bam_file = objectstore.cl_input(bam_file) cmd = "sambamba view -h {bam_file} | head -50000 | " "sambamba view -S -F paired /dev/stdin | head -1 | wc -l" out = subprocess.check_output( cmd.format(**locals()), shell=True, executable=do.find_bash(), stderr=open("/dev/null", "w") ) return int(out) > 0
def is_paired(bam_file): """Determine if a BAM file has paired reads. """ bam_file = objectstore.cl_input(bam_file) cmd = ("sambamba view -h {bam_file} | head -50000 | " "sambamba view -S -F paired /dev/stdin | head -1 | wc -l") out = subprocess.check_output(cmd.format(**locals()), shell=True, executable=do.find_bash(), stderr=open("/dev/null", "w")) return int(out) > 0
def is_empty(bam_file): """Determine if a BAM file is empty """ bam_file = objectstore.cl_input(bam_file) cmd = ("set -o pipefail; " "samtools view {bam_file} | head -1 | wc -l") p = subprocess.Popen(cmd.format(**locals()), shell=True, executable=do.find_bash(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)) stdout, stderr = p.communicate() stderr = stderr.strip() if ((p.returncode == 0 or p.returncode == 141) and (stderr == "" or (stderr.startswith("gof3r") and stderr.endswith("broken pipe")))): return int(stdout) == 0 else: raise ValueError("Failed to check empty status of BAM file: %s" % str(stderr))
def is_empty(bam_file): """Determine if a BAM file is empty """ bam_file = objectstore.cl_input(bam_file) sambamba = config_utils.get_program("sambamba", {}) cmd = ("set -o pipefail; " "{sambamba} view {bam_file} | head -1 | wc -l") p = subprocess.Popen(cmd.format(**locals()), shell=True, executable=do.find_bash(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)) stdout, stderr = p.communicate() stderr = stderr.strip() if ((p.returncode == 0 or p.returncode == 141) and (stderr == "" or (stderr.startswith("gof3r") and stderr.endswith("broken pipe")))): return int(stdout) == 0 else: raise ValueError("Failed to check empty status of BAM file: %s" % str(stderr))
def is_paired(bam_file): """Determine if a BAM file has paired reads. Works around issues with head closing the samtools pipe using signal trick from: http://stackoverflow.com/a/12451083/252589 """ bam_file = objectstore.cl_input(bam_file) cmd = ("set -o pipefail; " "sambamba view -h {bam_file} | head -50000 | " "sambamba view -S -F paired /dev/stdin | head -1 | wc -l") p = subprocess.Popen(cmd.format(**locals()), shell=True, executable=do.find_bash(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)) stdout, stderr = p.communicate() if p.returncode == 0 or p.returncode == 141 and stderr.strip() == "": return int(stdout) > 0 else: raise ValueError("Failed to check paired status of BAM file: %s" % str(stderr))