def _run_on_chrom(chrom, work_bams, names, work_dir, items): """Run cn.mops on work BAMs for a specific chromosome. """ local_sitelib = os.path.join(install.get_defaults().get("tooldir", "/usr/local"), "lib", "R", "site-library") batch = sshared.get_cur_batch(items) ext = "-%s-cnv" % batch if batch else "-cnv" out_file = os.path.join(work_dir, "%s%s-%s.bed" % (os.path.splitext(os.path.basename(work_bams[0]))[0], ext, chrom if chrom else "all")) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: rcode = "%s-run.R" % os.path.splitext(out_file)[0] with open(rcode, "w") as out_handle: out_handle.write(_script.format(prep_str=_prep_load_script(work_bams, names, chrom, items), out_file=tx_out_file, local_sitelib=local_sitelib)) rscript = config_utils.get_program("Rscript", items[0]["config"]) try: do.run([rscript, rcode], "cn.mops CNV detection", items[0], log_error=False) except subprocess.CalledProcessError, msg: # cn.mops errors out if no CNVs found. Just write an empty file. if _allowed_cnmops_errorstates(str(msg)): with open(tx_out_file, "w") as out_handle: out_handle.write('track name=empty description="No CNVs found"\n') else: logger.exception() raise
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items): """Run lumpy-sv, using speedseq pipeline. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join( work_dir, "%s%s.vcf" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: with tx_tmpdir(items[0]) as tmpdir: full_bams = ",".join(full_bams) sr_bams = ",".join(sr_bams) disc_bams = ",".join(disc_bams) exclude = "-x %s" % sv_exclude_bed if utils.file_exists( sv_exclude_bed) else "" ref_file = dd.get_ref_file(items[0]) # use our bcbio python for runs within lumpyexpress curpython_dir = os.path.dirname(sys.executable) cmd = ( "export PATH={curpython_dir}:$PATH && " "lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} " "{exclude} -T {tmpdir} -o {tx_out_file}") do.run(cmd.format(**locals()), "lumpyexpress", items[0]) return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed
def _run_delly(bam_files, chrom, ref_file, work_dir, items): """Run delly, calling structural variations for the specified type. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join( work_dir, "%s%s-%s.bcf" % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom)) final_file = "%s.vcf.gz" % (utils.splitext_plus(out_file)[0]) cores = min( utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1), len(bam_files)) if not utils.file_exists(out_file) and not utils.file_exists(final_file): with file_transaction(items[0], out_file) as tx_out_file: if sshared.has_variant_regions(items, out_file, chrom): exclude = ["-x", _delly_exclude_file(items, out_file, chrom)] cmd = ["delly", "call", "-g", ref_file, "-o", tx_out_file ] + exclude + bam_files multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores try: do.run(multi_cmd + " ".join(cmd), "delly structural variant") except subprocess.CalledProcessError as msg: # delly returns an error exit code if there are no variants if "No structural variants found" not in str(msg): raise return [_bgzip_and_clean(out_file, items)]
def _run_delly(bam_files, chrom, sv_type, ref_file, work_dir, items): """Run delly, calling structural variations for the specified type. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join(work_dir, "%s%s-%s-%s.vcf" % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom, sv_type)) cores = min(utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1), len(bam_files)) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: names = [tz.get_in(["rgnames", "sample"], x) for x in items] if not sshared.has_variant_regions(items, out_file, chrom): vcfutils.write_empty_vcf(tx_out_file, samples=names) else: exclude = ["-x", _delly_exclude_file(items, out_file, chrom)] map_qual = ["--map-qual", "1"] cmd = ["delly", "-t", sv_type, "-g", ref_file, "-o", tx_out_file] + exclude + bam_files multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores try: do.run(multi_cmd + " ".join(cmd), "delly structural variant") # Delly will write nothing if no variants found if not utils.file_exists(tx_out_file): vcfutils.write_empty_vcf(tx_out_file, samples=names) except subprocess.CalledProcessError, msg: # delly returns an error exit code if there are no variants if "No structural variants found" in str(msg): vcfutils.write_empty_vcf(tx_out_file, samples=names) else: raise
def _run_lumpy(full_bams, sr_bams, disc_bams, previous_evidence, work_dir, items): """Run lumpy-sv, using speedseq pipeline. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join(work_dir, "%s%s.vcf" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: with tx_tmpdir(items[0]) as tmpdir: full_bams = ",".join(full_bams) sr_bams = ",".join(sr_bams) disc_bams = ",".join(disc_bams) exclude = "-x %s" % sv_exclude_bed if (sv_exclude_bed and utils.file_exists(sv_exclude_bed)) else "" ref_file = dd.get_ref_file(items[0]) depths = [] for sample, ev_files in previous_evidence.items(): for ev_type, ev_file in ev_files.items(): if utils.file_exists(ev_file): depths.append("%s:%s" % (sample, ev_file)) depth_arg = "-d %s" % ",".join(depths) if len(depths) > 0 else "" # use our bcbio python for runs within lumpyexpress exports = utils.local_path_export() cmd = ("{exports}lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} " "{exclude} {depth_arg} -T {tmpdir} -o {tx_out_file}") do.run(cmd.format(**locals()), "lumpyexpress", items[0]) return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed
def _run_lumpy(full_bams, sr_bams, disc_bams, previous_evidence, work_dir, items): """Run lumpy-sv, using speedseq pipeline. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join( work_dir, "%s%s.vcf" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: with tx_tmpdir(items[0]) as tmpdir: full_bams = ",".join(full_bams) sr_bams = ",".join(sr_bams) disc_bams = ",".join(disc_bams) exclude = "-x %s" % sv_exclude_bed if ( sv_exclude_bed and utils.file_exists(sv_exclude_bed)) else "" ref_file = dd.get_ref_file(items[0]) depths = [] for sample, ev_files in previous_evidence.items(): for ev_type, ev_file in ev_files.items(): if utils.file_exists(ev_file): depths.append("%s:%s" % (sample, ev_file)) depth_arg = "-d %s" % ",".join(depths) if len( depths) > 0 else "" # use our bcbio python for runs within lumpyexpress exports = utils.local_path_export() cmd = ( "{exports}lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} " "{exclude} {depth_arg} -T {tmpdir} -o {tx_out_file}") do.run(cmd.format(**locals()), "lumpyexpress", items[0]) return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed
def _run_on_chrom(chrom, work_bams, names, work_dir, items): """Run cn.mops on work BAMs for a specific chromosome. """ local_sitelib = utils.R_sitelib() batch = sshared.get_cur_batch(items) ext = "-%s-cnv" % batch if batch else "-cnv" out_file = os.path.join( work_dir, "%s%s-%s.bed" % (os.path.splitext(os.path.basename( work_bams[0]))[0], ext, chrom if chrom else "all")) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: rcode = "%s-run.R" % os.path.splitext(out_file)[0] with open(rcode, "w") as out_handle: out_handle.write( _script.format(prep_str=_prep_load_script( work_bams, names, chrom, items), out_file=tx_out_file, local_sitelib=local_sitelib)) rscript = utils.Rscript_cmd() try: do.run([rscript, "--vanilla", rcode], "cn.mops CNV detection", items[0], log_error=False) except subprocess.CalledProcessError as msg: # cn.mops errors out if no CNVs found. Just write an empty file. if _allowed_cnmops_errorstates(str(msg)): with open(tx_out_file, "w") as out_handle: out_handle.write( 'track name=empty description="No CNVs found"\n') else: logger.exception() raise return [out_file]
def _run_delly(bam_files, chrom, sv_type, ref_file, work_dir, items): """Run delly, calling structural variations for the specified type. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join(work_dir, "%s%s-%s-%s.bcf" % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom, sv_type)) final_file = "%s.vcf.gz" % (utils.splitext_plus(out_file)[0]) cores = min(utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1), len(bam_files)) if not utils.file_exists(out_file) and not utils.file_exists(final_file): with file_transaction(items[0], out_file) as tx_out_file: if sshared.has_variant_regions(items, out_file, chrom): exclude = ["-x", _delly_exclude_file(items, out_file, chrom)] # uses -n to skip small indel detection for speed, not yet optimized: # https://github.com/dellytools/delly/issues/36 cmd = ["delly", "call", "-n", "-t", sv_type, "-g", ref_file, "-o", tx_out_file] + exclude + bam_files multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores try: do.run(multi_cmd + " ".join(cmd), "delly structural variant") except subprocess.CalledProcessError as msg: # delly returns an error exit code if there are no variants if "No structural variants found" not in str(msg): raise return [_bgzip_and_clean(out_file, items)]
def _run_delly(bam_files, chrom, ref_file, work_dir, items): """Run delly, calling structural variations for the specified type. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join(work_dir, "%s%s-%s.bcf" % (os.path.splitext(os.path.basename(bam_files[0]))[0], ext, chrom)) final_file = "%s.vcf.gz" % (utils.splitext_plus(out_file)[0]) cores = min(utils.get_in(items[0], ("config", "algorithm", "num_cores"), 1), len(bam_files)) if not utils.file_exists(out_file) and not utils.file_exists(final_file): with file_transaction(items[0], out_file) as tx_out_file: if sshared.has_variant_regions(items, out_file, chrom): exclude = ["-x", _delly_exclude_file(items, out_file, chrom)] cmd = ["delly", "call", "-g", ref_file, "-o", tx_out_file] + exclude + bam_files multi_cmd = "export OMP_NUM_THREADS=%s && export LC_ALL=C && " % cores try: do.run(multi_cmd + " ".join(cmd), "delly structural variant") except subprocess.CalledProcessError as msg: # Small input samples, write an empty vcf if "Sample has not enough data to estimate library parameters" in str(msg): pass # delly returns an error exit code if there are no variants elif "No structural variants found" not in str(msg): raise return [_bgzip_and_clean(out_file, items)]
def _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items): """Run lumpy-sv using smoove. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" name = "%s%s" % (dd.get_sample_name(items[0]), ext) out_file = os.path.join(work_dir, "%s-smoove.genotyped.vcf.gz" % name) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) old_out_file = os.path.join(work_dir, "%s%s-prep.vcf.gz" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) if utils.file_exists(old_out_file): return old_out_file, sv_exclude_bed if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: cores = dd.get_num_cores(items[0]) out_dir = os.path.dirname(tx_out_file) ref_file = dd.get_ref_file(items[0]) full_bams = " ".join(_prepare_smoove_bams(full_bams, sr_bams, disc_bams, items, os.path.dirname(tx_out_file))) std_excludes = ["~^GL", "~^HLA", "~_random", "~^chrUn", "~alt", "~decoy"] def _is_std_exclude(n): clean_excludes = [x.replace("~", "").replace("^", "") for x in std_excludes] return any([n.startswith(x) or n.endswith(x) for x in clean_excludes]) exclude_chrs = [c.name for c in ref.file_contigs(ref_file) if not chromhacks.is_nonalt(c.name) and not _is_std_exclude(c.name)] exclude_chrs = "--excludechroms '%s'" % ",".join(std_excludes + exclude_chrs) exclude_bed = ("--exclude %s" % sv_exclude_bed) if utils.file_exists(sv_exclude_bed) else "" tempdir = os.path.dirname(tx_out_file) cmd = ("export TMPDIR={tempdir} && " "smoove call --processes {cores} --genotype --removepr --fasta {ref_file} " "--name {name} --outdir {out_dir} " "{exclude_bed} {exclude_chrs} {full_bams}") with utils.chdir(tempdir): try: do.run(cmd.format(**locals()), "smoove lumpy calling", items[0]) except subprocess.CalledProcessError as msg: if _allowed_errors(str(msg)): vcfutils.write_empty_vcf(tx_out_file, config=items[0]["config"], samples=[dd.get_sample_name(d) for d in items]) else: logger.exception() raise vcfutils.bgzip_and_index(out_file, items[0]["config"]) return out_file, sv_exclude_bed
def _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items): """Run lumpy-sv using smoove. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" name = "%s%s" % (dd.get_sample_name(items[0]), ext) out_file = os.path.join(work_dir, "%s-smoove.genotyped.vcf.gz" % name) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) old_out_file = os.path.join(work_dir, "%s%s-prep.vcf.gz" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) if utils.file_exists(old_out_file): return old_out_file, sv_exclude_bed if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: cores = dd.get_num_cores(items[0]) out_dir = os.path.dirname(tx_out_file) ref_file = dd.get_ref_file(items[0]) full_bams = " ".join(_prepare_smoove_bams(full_bams, sr_bams, disc_bams, items, os.path.dirname(tx_out_file))) std_excludes = ["~^GL", "~^HLA", "~_random", "~^chrUn", "~alt", "~decoy"] def _is_std_exclude(n): clean_excludes = [x.replace("~", "").replace("^", "") for x in std_excludes] return any([n.startswith(x) or n.endswith(x) for x in clean_excludes]) exclude_chrs = [c.name for c in ref.file_contigs(ref_file) if not chromhacks.is_nonalt(c.name) and not _is_std_exclude(c.name)] exclude_chrs = "--excludechroms '%s'" % ",".join(std_excludes + exclude_chrs) exclude_bed = ("--exclude %s" % sv_exclude_bed) if utils.file_exists(sv_exclude_bed) else "" tempdir = os.path.dirname(tx_out_file) cmd = ("export TMPDIR={tempdir} && " "smoove call --processes {cores} --genotype --removepr --fasta {ref_file} " "--name {name} --outdir {out_dir} " "{exclude_bed} {exclude_chrs} {full_bams}") with utils.chdir(tempdir): try: do.run(cmd.format(**locals()), "smoove lumpy calling", items[0]) except subprocess.CalledProcessError as msg: if _allowed_errors(msg): vcfutils.write_empty_vcf(tx_out_file, config=items[0]["config"], samples=[dd.get_sample_name(d) for d in items]) else: logger.exception() raise vcfutils.bgzip_and_index(out_file, items[0]["config"]) return out_file, sv_exclude_bed
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items): """Run lumpy-sv, using speedseq pipeline. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join(work_dir, "%s%s.sv.bedpe" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) if not utils.file_exists(out_file): with file_transaction(out_file) as tx_out_file: with utils.curdir_tmpdir(items[0]) as tmpdir: out_base = tx_out_file.replace(".sv.bedpe", "") full_bams = ",".join(full_bams) sr_bams = ",".join(sr_bams) disc_bams = ",".join(disc_bams) exclude = "-x %s" % sv_exclude_bed if sv_exclude_bed else "" ref_file = dd.get_ref_file(items[0]) cmd = ("speedseq sv -v -B {full_bams} -S {sr_bams} -D {disc_bams} -R {ref_file} " "{exclude} -A false -T {tmpdir} -o {out_base}") do.run(cmd.format(**locals()), "speedseq lumpy", items[0]) return out_file, sv_exclude_bed
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items): """Run lumpy-sv, using speedseq pipeline. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join(work_dir, "%s%s.sv.bedpe" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: with tx_tmpdir(items[0]) as tmpdir: out_base = tx_out_file.replace(".sv.bedpe", "") full_bams = ",".join(full_bams) sr_bams = ",".join(sr_bams) disc_bams = ",".join(disc_bams) exclude = "-x %s" % sv_exclude_bed if sv_exclude_bed else "" ref_file = dd.get_ref_file(items[0]) cmd = ("speedseq sv -v -B {full_bams} -S {sr_bams} -D {disc_bams} -R {ref_file} " "{exclude} -A false -T {tmpdir} -o {out_base}") do.run(cmd.format(**locals()), "speedseq lumpy", items[0]) return out_file, sv_exclude_bed
def _run_lumpy(full_bams, sr_bams, disc_bams, work_dir, items): """Run lumpy-sv, using speedseq pipeline. """ batch = sshared.get_cur_batch(items) ext = "-%s-svs" % batch if batch else "-svs" out_file = os.path.join(work_dir, "%s%s.vcf" % (os.path.splitext(os.path.basename(items[0]["align_bam"]))[0], ext)) sv_exclude_bed = sshared.prepare_exclude_file(items, out_file) if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: with tx_tmpdir(items[0]) as tmpdir: full_bams = ",".join(full_bams) sr_bams = ",".join(sr_bams) disc_bams = ",".join(disc_bams) exclude = "-x %s" % sv_exclude_bed if utils.file_exists(sv_exclude_bed) else "" ref_file = dd.get_ref_file(items[0]) # use our bcbio python for runs within lumpyexpress curpython_dir = os.path.dirname(sys.executable) cmd = ("export PATH={curpython_dir}:$PATH && " "lumpyexpress -v -B {full_bams} -S {sr_bams} -D {disc_bams} " "{exclude} -T {tmpdir} -o {tx_out_file}") do.run(cmd.format(**locals()), "lumpyexpress", items[0]) return vcfutils.sort_by_ref(out_file, items[0]), sv_exclude_bed