def run(items): paired = vcfutils.get_paired(items) # paired is PairedInfo of one T/N pair (or just T) - named tuple, paired.tumor_config if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) normaldb = tz.get_in(["algorithm", "background", "cnv_reference", "purecn_normaldb"], paired.tumor_config) # the right way of running purecn is with normaldb if normaldb: purecn_out = _run_purecn_normaldb(paired, work_dir) purecn_out = _run_purecn_dx(purecn_out, paired) else: purecn_out = _run_purecn(paired, work_dir) out = [] if paired.normal_data: out.append(paired.normal_data) if purecn_out: purecn_out["variantcaller"] = "purecn" if "loh" in purecn_out: from bcbio.structural import titancna purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf, paired.tumor_data, sep=",") purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) out.append(paired.tumor_data) return out
def _run_paired(paired): """Run somatic variant calling pipeline. """ from bcbio.structural import titancna work_dir = _sv_workdir(paired.tumor_data) seg_files = model_segments( tz.get_in(["depth", "bins", "normalized"], paired.tumor_data), work_dir, paired) call_file = call_copy_numbers(seg_files["seg"], work_dir, paired.tumor_data) out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append({ "variantcaller": "gatk-cnv", "call_file": call_file, "vrn_file": titancna.to_vcf(call_file, "GATK4-CNV", _get_seg_header, _seg_to_vcf, paired.tumor_data), "seg": seg_files["seg"], "plot": plot_model_segments(seg_files, work_dir, paired.tumor_data) }) out.append(paired.tumor_data) return out
def run(items): paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) purecn_out = _run_purecn(paired, work_dir) # XXX Currently finding edge case failures with Dx calling, needs additional testing # purecn_out = _run_purecn_dx(purecn_out, paired) out = [] if paired.normal_data: out.append(paired.normal_data) if purecn_out: purecn_out["variantcaller"] = "purecn" if "loh" in purecn_out: from bcbio.structural import titancna purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf, paired.tumor_data, sep=",") purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) out.append(paired.tumor_data) return out
def run(items): paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) purecn_out = _run_purecn(paired, work_dir) # XXX Currently finding edge case failures with Dx calling, needs additional testing # purecn_out = _run_purecn_dx(purecn_out, paired) out = [] if paired.normal_data: out.append(paired.normal_data) if purecn_out: purecn_out["variantcaller"] = "purecn" if "loh" in purecn_out: from bcbio.structural import titancna purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf, paired.tumor_data, sep=",") purecn_out["lohsummary"] = loh.summary_status( purecn_out, paired.tumor_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) out.append(paired.tumor_data) return out
def _run_purple(paired, het_file, depth_file, vrn_files, work_dir): """Run PURPLE with pre-calculated AMBER and COBALT compatible inputs. """ purple_dir = utils.safe_makedir(os.path.join(work_dir, "purple")) out_file = os.path.join(purple_dir, "%s.purple.cnv" % dd.get_sample_name(paired.tumor_data)) if not utils.file_exists(out_file): with file_transaction(paired.tumor_data, out_file) as tx_out_file: cmd = ["PURPLE"] + _get_jvm_opts(tx_out_file, paired.tumor_data) + \ ["-amber", os.path.dirname(het_file), "-baf", het_file, "-cobalt", os.path.dirname(depth_file), "-gc_profile", dd.get_variation_resources(paired.tumor_data)["gc_profile"], "-output_dir", os.path.dirname(tx_out_file), "-ref_genome", "hg38" if dd.get_genome_build(paired.tumor_data) == "hg38" else "hg19", "-run_dir", work_dir, "-threads", dd.get_num_cores(paired.tumor_data), "-tumor_sample", dd.get_sample_name(paired.tumor_data), "-ref_sample", dd.get_sample_name(paired.normal_data)] if vrn_files: cmd += ["-somatic_vcf", vrn_files[0]["vrn_file"]] # Avoid X11 display errors when writing plots cmd = "unset DISPLAY && %s" % " ".join([str(x) for x in cmd]) do.run(cmd, "PURPLE: purity and ploidy estimation") for f in os.listdir(os.path.dirname(tx_out_file)): if f != os.path.basename(tx_out_file): shutil.move(os.path.join(os.path.dirname(tx_out_file), f), os.path.join(purple_dir, f)) out_file_export = os.path.join(purple_dir, "%s-purple-cnv.tsv" % (dd.get_sample_name(paired.tumor_data))) if not utils.file_exists(out_file_export): utils.symlink_plus(out_file, out_file_export) out = {"variantcaller": "purple", "call_file": out_file_export, "vrn_file": titancna.to_vcf(out_file_export, "PURPLE", _get_header, _export_to_vcf, paired.tumor_data), "plot": {}, "metrics": {}} for name, ext in [("copy_number", "copyNumber"), ("minor_allele", "minor_allele"), ("variant", "variant")]: plot_file = os.path.join(purple_dir, "plot", "%s.%s.png" % (dd.get_sample_name(paired.tumor_data), ext)) if os.path.exists(plot_file): out["plot"][name] = plot_file purity_file = os.path.join(purple_dir, "%s.purple.purity" % dd.get_sample_name(paired.tumor_data)) with open(purity_file) as in_handle: header = in_handle.readline().replace("#", "").split("\t") vals = in_handle.readline().split("\t") for h, v in zip(header, vals): try: v = float(v) except ValueError: pass out["metrics"][h] = v return out
def _run_paired(paired): """Run somatic variant calling pipeline. """ from bcbio.structural import titancna work_dir = _sv_workdir(paired.tumor_data) seg_files = model_segments(tz.get_in(["depth", "bins", "normalized"], paired.tumor_data), work_dir, paired) call_file = call_copy_numbers(seg_files["seg"], work_dir, paired.tumor_data) out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append({"variantcaller": "gatk-cnv", "call_file": call_file, "vrn_file": titancna.to_vcf(call_file, "GATK4-CNV", _get_seg_header, _seg_to_vcf, paired.tumor_data), "seg": seg_files["seg"], "plot": plot_model_segments(seg_files, work_dir, paired.tumor_data)}) out.append(paired.tumor_data) return out
def _run_purple(paired, het_file, depth_file, vrn_files, work_dir): """Run PURPLE with pre-calculated AMBER and COBALT compatible inputs. """ purple_dir = utils.safe_makedir(os.path.join(work_dir, "purple")) out_file = os.path.join( purple_dir, "%s.purple.cnv" % dd.get_sample_name(paired.tumor_data)) if not utils.file_exists(out_file): with file_transaction(paired.tumor_data, out_file) as tx_out_file: cmd = ["PURPLE"] + _get_jvm_opts(tx_out_file, paired.tumor_data) + \ ["-amber", os.path.dirname(het_file), "-baf", het_file, "-cobalt", os.path.dirname(depth_file), "-gc_profile", dd.get_variation_resources(paired.tumor_data)["gc_profile"], "-output_dir", os.path.dirname(tx_out_file), "-ref_genome", "hg38" if dd.get_genome_build(paired.tumor_data) == "hg38" else "hg19", "-run_dir", work_dir, "-threads", dd.get_num_cores(paired.tumor_data), "-tumor_sample", dd.get_sample_name(paired.tumor_data), "-ref_sample", dd.get_sample_name(paired.normal_data)] if vrn_files: cmd += ["-somatic_vcf", vrn_files[0]["vrn_file"]] # Avoid X11 display errors when writing plots cmd = "unset DISPLAY && %s" % " ".join([str(x) for x in cmd]) do.run(cmd, "PURPLE: purity and ploidy estimation") for f in os.listdir(os.path.dirname(tx_out_file)): if f != os.path.basename(tx_out_file): shutil.move(os.path.join(os.path.dirname(tx_out_file), f), os.path.join(purple_dir, f)) out_file_export = os.path.join( purple_dir, "%s-purple-cnv.tsv" % (dd.get_sample_name(paired.tumor_data))) if not utils.file_exists(out_file_export): utils.symlink_plus(out_file, out_file_export) out = { "variantcaller": "purple", "call_file": out_file_export, "vrn_file": titancna.to_vcf(out_file_export, "PURPLE", _get_header, _export_to_vcf, paired.tumor_data), "plot": {}, "metrics": {} } for name, ext in [("copy_number", "copyNumber"), ("minor_allele", "minor_allele"), ("variant", "variant")]: plot_file = os.path.join( purple_dir, "plot", "%s.%s.png" % (dd.get_sample_name(paired.tumor_data), ext)) if os.path.exists(plot_file): out["plot"][name] = plot_file purity_file = os.path.join( purple_dir, "%s.purple.purity" % dd.get_sample_name(paired.tumor_data)) with open(purity_file) as in_handle: header = in_handle.readline().replace("#", "").split("\t") vals = in_handle.readline().split("\t") for h, v in zip(header, vals): try: v = float(v) except ValueError: pass out["metrics"][h] = v return out