def _check_for_problem_somatic_batches(items, config): """Identify problem batch setups for somatic calling. We do not support multiple tumors in a single batch and VarDict(Java) does not handle pooled calling, only tumor/normal. """ to_check = [] for data in items: data = copy.deepcopy(data) data["config"] = config_utils.update_w_custom(config, data) to_check.append(data) data_by_batches = collections.defaultdict(list) for data in to_check: batches = dd.get_batches(data) if batches: for batch in batches: data_by_batches[batch].append(data) for batch, items in data_by_batches.items(): if vcfutils.get_paired(items): vcfutils.check_paired_problems(items) elif len(items) > 1: vcs = list(set(tz.concat([dd.get_variantcaller(data) or [] for data in items]))) if any(x.lower().startswith("vardict") for x in vcs): raise ValueError("VarDict does not support pooled non-tumor/normal calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items])) elif any(x.lower() == "mutect" for x in vcs): raise ValueError("Mutect requires a 'phenotype: tumor' sample for calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items]))
def _check_for_problem_somatic_batches(items, config): """Identify problem batch setups for somatic calling. We do not support multiple tumors in a single batch and VarDict(Java) does not handle pooled calling, only tumor/normal. """ to_check = [] for data in items: data = copy.deepcopy(data) data["config"] = config_utils.update_w_custom(config, data) to_check.append(data) data_by_batches = collections.defaultdict(list) for data in to_check: batches = dd.get_batches(data) if batches: for batch in batches: data_by_batches[batch].append(data) for batch, items in data_by_batches.items(): if vcfutils.get_paired(items): vcfutils.check_paired_problems(items) elif len(items) > 1: vcs = list( set( tz.concat( [dd.get_variantcaller(data) or [] for data in items]))) if any(x.lower().startswith("vardict") for x in vcs): raise ValueError( "VarDict does not support pooled non-tumor/normal calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items]))
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists( variant_file), "Manta finished without output file %s" % variant_file out = [] for data in items: sample_file = _select_sample(data, variant_file, work_dir) if "sv" not in data: data["sv"] = [] effects_vcf, _ = effects.add_to_vcf(sample_file, data, "snpeff") data["sv"].append({ "variantcaller": "manta", "vrn_file": effects_vcf or sample_file }) out.append(data) return out
def get_qc_tools(data): """Retrieve a list of QC tools to use based on configuration and analysis type. Uses defaults if previously set. """ if dd.get_algorithm_qc(data): return dd.get_algorithm_qc(data) analysis = data["analysis"].lower() to_run = [] if "fastqc" not in dd.get_tools_off(data): to_run.append("fastqc") if any([tool in dd.get_tools_on(data) for tool in ["qualimap", "qualimap_full"]]): to_run.append("qualimap") if analysis.startswith("rna-seq"): if "qualimap" not in dd.get_tools_off(data): if gtf.is_qualimap_compatible(dd.get_gtf_file(data)): to_run.append("qualimap_rnaseq") else: logger.debug("GTF not compatible with Qualimap, skipping.") if analysis.startswith("smallrna-seq"): to_run.append("small-rna") if not analysis.startswith("smallrna-seq"): to_run.append("samtools") if tz.get_in(["config", "algorithm", "kraken"], data): to_run.append("kraken") if analysis.startswith(("standard", "variant", "variant2")): to_run += ["qsignature", "coverage", "variants", "picard"] if vcfutils.get_paired([data]): to_run += ["viral"] if damage.should_filter([data]): to_run += ["damage"] if dd.get_umi_consensus(data): to_run += ["umi"] return to_run
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists(variant_file), "Manta finished without output file %s" % variant_file variant_file = shared.annotate_with_depth(variant_file, items) out = [] upload_counts = collections.defaultdict(int) for data in items: if "break-point-inspector" in dd.get_tools_on(data): if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name(data): variant_file = _run_break_point_inspector(data, variant_file, paired, work_dir) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) vc = {"variantcaller": "manta", "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "vrn_file": final_vcf} evidence_bam = _get_evidence_bam(work_dir, data) if evidence_bam: vc["read_evidence"] = evidence_bam data["sv"].append(vc) upload_counts[final_vcf] += 1 out.append(data) return out
def split_somatic(items): """Split somatic batches, adding a germline target. Enables separate germline calling of samples using shared alignments. """ items = [_clean_flat_variantcaller(x) for x in items] somatic_groups, somatic, non_somatic = vcfutils.somatic_batches(items) # extract germline samples to run from normals in tumor/normal pairs germline_added = set([]) germline = [] for somatic_group in somatic_groups: paired = vcfutils.get_paired(somatic_group) if paired and paired.normal_data: cur = utils.deepish_copy(paired.normal_data) vc = dd.get_variantcaller(cur) if isinstance(vc, dict) and "germline" in vc: if cur["description"] not in germline_added: germline_added.add(cur["description"]) cur["rgnames"]["sample"] = cur["description"] cur["metadata"]["batch"] = "%s-germline" % cur["description"] cur["metadata"]["phenotype"] = "germline" cur = remove_align_qc_tools(cur) cur["config"]["algorithm"]["variantcaller"] = vc["germline"] germline.append(cur) # Fix variantcalling specification for only somatic targets somatic_out = [] for data in somatic: vc = dd.get_variantcaller(data) if isinstance(vc, dict) and "somatic" in vc: data["config"]["algorithm"]["variantcaller"] = vc["somatic"] somatic_out.append(data) return non_somatic + somatic_out + germline
def run(items): paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) purecn_out = _run_purecn(paired, work_dir) # XXX Currently finding edge case failures with Dx calling, needs additional testing # purecn_out = _run_purecn_dx(purecn_out, paired) out = [] if paired.normal_data: out.append(paired.normal_data) if purecn_out: purecn_out["variantcaller"] = "purecn" if "loh" in purecn_out: from bcbio.structural import titancna purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf, paired.tumor_data, sep=",") purecn_out["lohsummary"] = loh.summary_status( purecn_out, paired.tumor_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) out.append(paired.tumor_data) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists( variant_file), "Manta finished without output file %s" % variant_file out = [] for data in items: if paired and paired.normal_bam and "break-point-inspector" in dd.get_tools_on( data): variant_file = _run_break_point_inspector(data, variant_file, paired) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) data["sv"].append({"variantcaller": "manta", "vrn_file": final_vcf}) out.append(data) return out
def population_variant_regions(items, merged=False): """Retrieve the variant region BED file from a population of items. If tumor/normal, return the tumor BED file. If a population, return the BED file covering the most bases. """ def _get_variant_regions(data): out = dd.get_variant_regions(data) or dd.get_sample_callable(data) # Only need to merge for variant region inputs, not callable BED regions which don't overlap if merged and dd.get_variant_regions(data): merged_out = dd.get_variant_regions_merged(data) if merged_out: out = merged_out else: out = merge_overlaps(out, data) return out import pybedtools if len(items) == 1: return _get_variant_regions(items[0]) else: paired = vcfutils.get_paired(items) if paired: return _get_variant_regions(paired.tumor_data) else: vrs = [] for data in items: vr_bed = _get_variant_regions(data) if vr_bed: vrs.append((pybedtools.BedTool(vr_bed).total_coverage(), vr_bed)) vrs.sort(reverse=True) if vrs: return vrs[0][1]
def annotate_with_depth(in_file, items): """Annotate called VCF file with depth using duphold (https://github.com/brentp/duphold) Currently annotates single sample and tumor samples in somatic analysis. """ bam_file = None if len(items) == 1: bam_file = dd.get_align_bam(items[0]) else: paired = vcfutils.get_paired(items) if paired: bam_file = paired.tumor_bam if bam_file: out_file = "%s-duphold.vcf.gz" % utils.splitext_plus(in_file)[0] if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: if not in_file.endswith(".gz"): in_file = vcfutils.bgzip_and_index( in_file, remove_orig=False, out_dir=os.path.dirname(tx_out_file)) ref_file = dd.get_ref_file(items[0]) # cores for BAM reader thread, so max out at 4 based on recommendations cores = min([dd.get_num_cores(items[0]), 4]) cmd = ( "duphold --threads {cores} --vcf {in_file} --bam {bam_file} --fasta {ref_file} " "-o {tx_out_file}") do.run(cmd.format(**locals()), "Annotate SV depth with duphold") vcfutils.bgzip_and_index(out_file) return out_file else: return in_file
def run(items): from bcbio import heterogeneity paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping TitanCNA; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) cn_file = _titan_cn_file(dd.get_normalized_depth(paired.tumor_data), work_dir, paired.tumor_data) het_file = _titan_het_file(heterogeneity.get_variants(paired.tumor_data), work_dir, paired) if _should_run(het_file): ploidy_outdirs = [] for ploidy in [2, 3, 4]: for num_clusters in [1, 2, 3]: out_dir = _run_titancna(cn_file, het_file, ploidy, num_clusters, work_dir, paired.tumor_data) ploidy_outdirs.append((ploidy, out_dir)) solution_file = _run_select_solution(ploidy_outdirs, work_dir, paired.tumor_data) else: logger.info("Skipping TitanCNA; not enough input data: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append( _finalize_sv(solution_file, paired.tumor_data)) out.append(paired.tumor_data) return out
def _check_for_problem_somatic_batches(items, config): """Identify problem batch setups for somatic calling. We do not support multiple tumors in a single batch and VarDict(Java) does not handle pooled calling, only tumor/normal. """ to_check = [] for data in items: data = copy.deepcopy(data) data["config"] = config_utils.update_w_custom(config, data) to_check.append(data) data_by_batches = collections.defaultdict(list) for data in to_check: batches = dd.get_batches(data) if batches: for batch in batches: data_by_batches[batch].append(data) for batch, items in data_by_batches.items(): if vcfutils.get_paired(items): vcfutils.check_paired_problems(items) elif len(items) > 1: vcs = vcfutils.get_somatic_variantcallers(items) if "vardict" in vcs: raise ValueError( "VarDict does not support pooled non-tumor/normal calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items])) elif "mutect" in vcs or "mutect2" in vcs: raise ValueError( "MuTect and MuTect2 require a 'phenotype: tumor' sample for calling, " "in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items]))
def run(items): paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) purecn_out = _run_purecn(paired, work_dir) # XXX Currently finding edge case failures with Dx calling, needs additional testing # purecn_out = _run_purecn_dx(purecn_out, paired) out = [] if paired.normal_data: out.append(paired.normal_data) if purecn_out: purecn_out["variantcaller"] = "purecn" if "loh" in purecn_out: from bcbio.structural import titancna purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf, paired.tumor_data, sep=",") purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) out.append(paired.tumor_data) return out
def _cnvkit_segment(cnr_file, cov_interval, data, items, out_file=None): """Perform segmentation and copy number calling on normalized inputs """ if not out_file: out_file = "%s.cns" % os.path.splitext(cnr_file)[0] if not utils.file_uptodate(out_file, cnr_file): with file_transaction(data, out_file) as tx_out_file: if not _cna_has_values(cnr_file): with open(tx_out_file, "w") as out_handle: out_handle.write("chromosome\tstart\tend\tgene\tlog2\tprobes\tCN1\tCN2\tbaf\tweight\n") else: cmd = [_get_cmd(), "segment", "-p", str(dd.get_cores(data)), "-o", tx_out_file, cnr_file] small_vrn_files = _compatible_small_variants(data, items) if len(small_vrn_files) > 0 and _cna_has_values(cnr_file) and cov_interval != "genome": cmd += ["--vcf", small_vrn_files[0].name, "--sample-id", small_vrn_files[0].sample] if small_vrn_files[0].normal: cmd += ["--normal-id", small_vrn_files[0].normal] if cov_interval == "genome": cmd += ["--threshold", "0.00001"] # For tumors, remove very low normalized regions, avoiding upcaptured noise # https://github.com/chapmanb/bcbio-nextgen/issues/2171#issuecomment-348333650 paired = vcfutils.get_paired(items) if paired: cmd += ["--drop-low-coverage"] # preferentially use conda installed Rscript export_cmd = ("%s && export TMPDIR=%s && " % (utils.get_R_exports(), os.path.dirname(tx_out_file))) do.run(export_cmd + " ".join(cmd), "CNVkit segment") return out_file
def population_variant_regions(items, merged=False): """Retrieve the variant region BED file from a population of items. If tumor/normal, return the tumor BED file. If a population, return the BED file covering the most bases. """ def _get_variant_regions(data): out = dd.get_variant_regions(data) or dd.get_sample_callable(data) if merged: merged_out = dd.get_variant_regions_merged(data) if merged_out: out = merged_out else: out = merge_overlaps(out, data) return out import pybedtools if len(items) == 1: return _get_variant_regions(items[0]) else: paired = vcfutils.get_paired(items) if paired: return _get_variant_regions(paired.tumor_data) else: vrs = [] for data in items: vr_bed = _get_variant_regions(data) if vr_bed: vrs.append( (pybedtools.BedTool(vr_bed).total_coverage(), vr_bed)) vrs.sort(reverse=True) if vrs: return vrs[0][1]
def split_somatic(items): """Split somatic batches, adding a germline target. Enables separate germline calling of samples using shared alignments. """ somatic_groups, somatic, non_somatic = vcfutils.somatic_batches(items) # extract germline samples to run from normals in tumor/normal pairs germline_added = set([]) germline = [] for somatic_group in somatic_groups: paired = vcfutils.get_paired(somatic_group) if paired and paired.normal_data: cur = utils.deepish_copy(paired.normal_data) vc = dd.get_variantcaller(cur) if isinstance(vc, dict) and "germline" in vc: cur["description"] = "%s-germline" % cur["description"] if cur["description"] not in germline_added: germline_added.add(cur["description"]) cur["rgnames"]["sample"] = cur["description"] del cur["metadata"]["batch"] cur["metadata"]["phenotype"] = "germline" cur = remove_align_qc_tools(cur) cur["config"]["algorithm"]["variantcaller"] = vc[ "germline"] germline.append(cur) # Fix variantcalling specification for only somatic targets somatic_out = [] for data in somatic: vc = dd.get_variantcaller(data) if isinstance(vc, dict) and "somatic" in vc: data["config"]["algorithm"]["variantcaller"] = vc["somatic"] somatic_out.append(data) return non_somatic + somatic_out + germline
def run(items): paired = vcfutils.get_paired(items) # paired is PairedInfo of one T/N pair (or just T) - named tuple, paired.tumor_config if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) normaldb = tz.get_in(["algorithm", "background", "cnv_reference", "purecn_normaldb"], paired.tumor_config) # the right way of running purecn is with normaldb if normaldb: purecn_out = _run_purecn_normaldb(paired, work_dir) purecn_out = _run_purecn_dx(purecn_out, paired) else: purecn_out = _run_purecn(paired, work_dir) out = [] if paired.normal_data: out.append(paired.normal_data) if purecn_out: purecn_out["variantcaller"] = "purecn" if "loh" in purecn_out: from bcbio.structural import titancna purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf, paired.tumor_data, sep=",") purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) out.append(paired.tumor_data) return out
def annotate_with_depth(in_file, items): """Annotate called VCF file with depth using duphold (https://github.com/brentp/duphold) Currently annotates single sample and tumor samples in somatic analysis. """ bam_file = None if len(items) == 1: bam_file = dd.get_align_bam(items[0]) else: paired = vcfutils.get_paired(items) if paired: bam_file = paired.tumor_bam if bam_file: out_file = "%s-duphold.vcf.gz" % utils.splitext_plus(in_file)[0] if not utils.file_exists(out_file): with file_transaction(items[0], out_file) as tx_out_file: if not in_file.endswith(".gz"): in_file = vcfutils.bgzip_and_index(in_file, remove_orig=False, out_dir=os.path.dirname(tx_out_file)) ref_file = dd.get_ref_file(items[0]) # cores for BAM reader thread, so max out at 4 based on recommendations cores = min([dd.get_num_cores(items[0]), 4]) cmd = ("duphold --threads {cores} --vcf {in_file} --bam {bam_file} --fasta {ref_file} " "-o {tx_out_file}") do.run(cmd.format(**locals()), "Annotate SV depth with duphold") vcfutils.bgzip_and_index(out_file) return out_file else: return in_file
def run(items): """Perform detection of structural variations with lumpy. """ if not all( utils.get_in(data, ("config", "algorithm", "aligner")) in ["bwa", "sentieon-bwa", "minimap2", False, None] for data in items): raise ValueError( "Require bwa or minimap2 alignment input for lumpy structural variation detection" ) paired = vcfutils.get_paired(items) work_dir = _sv_workdir( paired.tumor_data if paired and paired.tumor_data else items[0]) previous_evidence = {} full_bams, sr_bams, disc_bams = [], [], [] for data in items: full_bams.append(dd.get_align_bam(data)) sr_bam, disc_bam = sshared.find_existing_split_discordants(data) sr_bams.append(sr_bam) disc_bams.append(disc_bam) cur_dels, cur_dups = _bedpes_from_cnv_caller(data, work_dir) previous_evidence[dd.get_sample_name(data)] = {} if cur_dels and utils.file_exists(cur_dels): previous_evidence[dd.get_sample_name(data)]["dels"] = cur_dels if cur_dups and utils.file_exists(cur_dups): previous_evidence[dd.get_sample_name(data)]["dups"] = cur_dups lumpy_vcf, exclude_file = _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items) gt_vcfs = {} # Retain paired samples with tumor/normal genotyped in one file if paired and paired.normal_name: batches = [[paired.tumor_data, paired.normal_data]] else: batches = [[x] for x in items] for batch_items in batches: for data in batch_items: gt_vcfs[dd.get_sample_name(data)] = _filter_by_support( lumpy_vcf, data) if paired and paired.normal_name: gt_vcfs = _filter_by_background(paired.tumor_name, [paired.normal_name], gt_vcfs, paired.tumor_data) out = [] for data in items: if "sv" not in data: data["sv"] = [] vcf_file = gt_vcfs.get(dd.get_sample_name(data)) if vcf_file: if dd.get_svprioritize(data): effects_vcf, _ = effects.add_to_vcf(vcf_file, data, "snpeff") else: effects_vcf = None data["sv"].append({ "variantcaller": "lumpy", "vrn_file": effects_vcf or vcf_file, "exclude_file": exclude_file }) out.append(data) return out
def _cnvkit_segment(cnr_file, cov_interval, data, items, out_file=None, detailed=False): """Perform segmentation and copy number calling on normalized inputs """ if not out_file: out_file = "%s.cns" % os.path.splitext(cnr_file)[0] if not utils.file_uptodate(out_file, cnr_file): with file_transaction(data, out_file) as tx_out_file: if not _cna_has_values(cnr_file): with open(tx_out_file, "w") as out_handle: out_handle.write( "chromosome\tstart\tend\tgene\tlog2\tprobes\tCN1\tCN2\tbaf\tweight\n" ) else: # Scale cores to avoid memory issues with segmentation # https://github.com/etal/cnvkit/issues/346 if cov_interval == "genome": cores = max(1, dd.get_cores(data) // 2) else: cores = dd.get_cores(data) cmd = [ _get_cmd(), "segment", "-p", str(cores), "-o", tx_out_file, cnr_file ] small_vrn_files = _compatible_small_variants(data, items) if len(small_vrn_files) > 0 and _cna_has_values( cnr_file) and cov_interval != "genome": cmd += [ "--vcf", small_vrn_files[0].name, "--sample-id", small_vrn_files[0].sample ] if small_vrn_files[0].normal: cmd += ["--normal-id", small_vrn_files[0].normal] resources = config_utils.get_resources("cnvkit_segment", data["config"]) user_options = resources.get("options", []) cmd += [str(x) for x in user_options] if cov_interval == "genome" and "--threshold" not in user_options: cmd += ["--threshold", "0.00001"] # For tumors, remove very low normalized regions, avoiding upcaptured noise # https://github.com/bcbio/bcbio-nextgen/issues/2171#issuecomment-348333650 # unless we want detailed segmentation for downstream tools paired = vcfutils.get_paired(items) if paired: #if detailed: # cmd += ["-m", "hmm-tumor"] if "--drop-low-coverage" not in user_options: cmd += ["--drop-low-coverage"] # preferentially use conda installed Rscript export_cmd = ( "%s && export TMPDIR=%s && " % (utils.get_R_exports(), os.path.dirname(tx_out_file))) do.run(export_cmd + " ".join(cmd), "CNVkit segment") return out_file
def _pick_lead_item(items): """Choose lead item for a set of samples. Picks tumors for tumor/normal pairs and first sample for batch groups. """ paired = vcfutils.get_paired(items) if paired: return paired.tumor_data else: return list(items)[0]
def _annotate_somatic(data): """Annotate somatic calls if we have cosmic data installed. """ if is_human(data): paired = vcfutils.get_paired([data]) if paired: r = dd.get_variation_resources(data) if r.get("cosmic") and os.path.exists(r["cosmic"]): return True return False
def _annotate_somatic(data, retriever=None): """Annotate somatic calls if we have cosmic data installed. """ if is_human(data): paired = vcfutils.get_paired([data]) if paired: r = dd.get_variation_resources(data) if r.get("cosmic") and objectstore.file_exists_or_remote(r["cosmic"]): return True return False
def run(items): """Perform detection of structural variations with lumpy. """ paired = vcfutils.get_paired(items) work_dir = _sv_workdir( paired.tumor_data if paired and paired.tumor_data else items[0]) previous_evidence = {} full_bams, sr_bams, disc_bams = [], [], [] for data in items: full_bams.append(dd.get_align_bam(data)) sr_bam, disc_bam = sshared.find_existing_split_discordants(data) sr_bams.append(sr_bam) disc_bams.append(disc_bam) cur_dels, cur_dups = _bedpes_from_cnv_caller(data, work_dir) previous_evidence[dd.get_sample_name(data)] = {} if cur_dels and utils.file_exists(cur_dels): previous_evidence[dd.get_sample_name(data)]["dels"] = cur_dels if cur_dups and utils.file_exists(cur_dups): previous_evidence[dd.get_sample_name(data)]["dups"] = cur_dups lumpy_vcf, exclude_file = _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items) lumpy_vcf = sshared.annotate_with_depth(lumpy_vcf, items) gt_vcfs = {} # Retain paired samples with tumor/normal genotyped in one file if paired and paired.normal_name: batches = [[paired.tumor_data, paired.normal_data]] else: batches = [[x] for x in items] for batch_items in batches: for data in batch_items: gt_vcfs[dd.get_sample_name(data)] = _filter_by_support( lumpy_vcf, data) if paired and paired.normal_name: gt_vcfs = _filter_by_background(paired.tumor_name, [paired.normal_name], gt_vcfs, paired.tumor_data) out = [] upload_counts = collections.defaultdict(int) for data in items: if "sv" not in data: data["sv"] = [] vcf_file = gt_vcfs.get(dd.get_sample_name(data)) if vcf_file: effects_vcf, _ = effects.add_to_vcf(vcf_file, data, "snpeff") data["sv"].append({ "variantcaller": "lumpy", "vrn_file": effects_vcf or vcf_file, "do_upload": upload_counts[vcf_file] == 0, # only upload a single file per batch "exclude_file": exclude_file }) upload_counts[vcf_file] += 1 out.append(data) return out
def run(items, background=None): """Detect copy number variations from batched set of samples using GATK4 CNV calling. TODO: implement germline calling with DetermineGermlineContigPloidy and GermlineCNVCaller """ if not background: background = [] paired = vcfutils.get_paired(items + background) if paired: out = _run_paired(paired) else: out = items logger.warn("GATK4 CNV calling currently only available for somatic samples: %s" % ", ".join([dd.get_sample_name(d) for d in items + background])) return out
def run(items): paired = vcfutils.get_paired(items) if not paired or not paired.normal_name: logger.info( "Skipping PURPLE; need tumor/normal somatic calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) from bcbio import heterogeneity het_file = _amber_het_file(heterogeneity.get_variants(paired.tumor_data), work_dir, paired) depth_file = _run_cobalt(paired, work_dir) print(het_file, depth_file) return items
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) work_dir = _sv_workdir(paired.tumor_data if paired else items[0]) workflow_file = _prep_config(items, paired, work_dir) variant_file = _run_workflow(items, paired, workflow_file, work_dir) out = [] for data in items: if "sv" not in data: data["sv"] = [] data["sv"].append({"variantcaller": "manta", "vrn_file": variant_file}) out.append(data) return out
def finalize_sv(orig_vcf, data, items): """Finalize structural variants, adding effects and splitting if needed. """ paired = vcfutils.get_paired(items) # For paired/somatic, attach combined calls to tumor sample if paired: sample_vcf = orig_vcf if paired.tumor_name == dd.get_sample_name(data) else None else: sample_vcf = "%s-%s.vcf.gz" % (utils.splitext_plus(orig_vcf)[0], dd.get_sample_name(data)) sample_vcf = vcfutils.select_sample(orig_vcf, dd.get_sample_name(data), sample_vcf, data["config"]) if sample_vcf: effects_vcf, _ = effects.add_to_vcf(sample_vcf, data, "snpeff") else: effects_vcf = None return effects_vcf or sample_vcf
def _compatible_small_variants(data, items): """Retrieve small variant (SNP, indel) VCFs compatible with CNVkit. """ from bcbio import heterogeneity VarFile = collections.namedtuple("VarFile", ["name", "sample", "normal"]) out = [] paired = vcfutils.get_paired(items) for v in heterogeneity.get_variants(data, include_germline=not paired): vrn_file = v["vrn_file"] base, ext = utils.splitext_plus(os.path.basename(vrn_file)) if paired: out.append(VarFile(vrn_file, paired.tumor_name, paired.normal_name)) else: out.append(VarFile(vrn_file, dd.get_sample_name(data), None)) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) work_dir = _sv_workdir(paired.tumor_data if paired else items[0]) workflow_file = _prep_config(items, paired, work_dir) variant_file = _run_workflow(items, paired, workflow_file, work_dir) sample_file = _select_sample(items, paired, variant_file, work_dir) out = [] for data in items: if "sv" not in data: data["sv"] = [] data["sv"].append({"variantcaller": "manta", "vrn_file": sample_file}) out.append(data) return out
def get_qc_tools(data): """Retrieve a list of QC tools to use based on configuration and analysis type. Uses defaults if previously set. """ if dd.get_algorithm_qc(data): return dd.get_algorithm_qc(data) analysis = data["analysis"].lower() to_run = [] if tz.get_in(["config", "algorithm", "kraken"], data): to_run.append("kraken") if "fastqc" not in dd.get_tools_off(data): to_run.append("fastqc") if any([ tool in dd.get_tools_on(data) for tool in ["qualimap", "qualimap_full"] ]): to_run.append("qualimap") if analysis.startswith("rna-seq") or analysis == "smallrna-seq": if "qualimap" not in dd.get_tools_off(data): if gtf.is_qualimap_compatible(dd.get_gtf_file(data)): to_run.append("qualimap_rnaseq") else: logger.debug("GTF not compatible with Qualimap, skipping.") if analysis.startswith("chip-seq"): to_run.append("chipqc") if analysis.startswith("smallrna-seq"): to_run.append("small-rna") to_run.append("atropos") if "coverage_qc" not in dd.get_tools_off(data): to_run.append("samtools") if analysis.startswith(("standard", "variant", "variant2")): if "coverage_qc" not in dd.get_tools_off(data): to_run += ["coverage", "picard"] to_run += ["qsignature", "variants"] if vcfanno.is_human(data): to_run += ["contamination", "peddy"] if vcfutils.get_paired([data]): to_run += ["viral"] if damage.should_filter([data]): to_run += ["damage"] if dd.get_umi_consensus(data): to_run += ["umi"] if tz.get_in(["config", "algorithm", "preseq"], data): to_run.append("preseq") to_run = [tool for tool in to_run if tool not in dd.get_tools_off(data)] to_run.sort() return to_run
def _add_vcf_header_sample_cl(in_file, items, base_file): """Add phenotype information to a VCF header. Encode tumor/normal relationships in VCF header. Could also eventually handle more complicated pedigree information if useful. """ paired = vcfutils.get_paired(items) if paired: toadd = ["##SAMPLE=<ID=%s,Genomes=Tumor>" % paired.tumor_name] if paired.normal_name: toadd.append("##SAMPLE=<ID=%s,Genomes=Germline>" % paired.normal_name) toadd.append("##PEDIGREE=<Derived=%s,Original=%s>" % (paired.tumor_name, paired.normal_name)) new_header = _update_header(in_file, base_file, toadd, _fix_generic_tn_names(paired)) if vcfutils.vcf_has_variants(in_file): cmd = "bcftools reheader -h {new_header} | bcftools view " return cmd.format(**locals())
def _compatible_small_variants(data, items): """Retrieve small variant (SNP, indel) VCFs compatible with CNVkit. """ VarFile = collections.namedtuple("VarFile", ["name", "sample", "normal"]) supported = set(["vardict", "freebayes", "gatk-haplotype", "mutect2", "vardict"]) out = [] for v in data.get("variants", []): vrn_file = v.get("vrn_file") if vrn_file and v.get("variantcaller") in supported: base, ext = utils.splitext_plus(os.path.basename(vrn_file)) paired = vcfutils.get_paired(items) if paired: out.append(VarFile(vrn_file, paired.tumor_name, paired.normal_name)) else: out.append(VarFile(vrn_file, dd.get_sample_name(data), None)) return out
def run(items): """Perform detection of structural variations with lumpy. """ paired = vcfutils.get_paired(items) work_dir = _sv_workdir(paired.tumor_data if paired and paired.tumor_data else items[0]) previous_evidence = {} full_bams, sr_bams, disc_bams = [], [], [] for data in items: full_bams.append(dd.get_align_bam(data)) sr_bam, disc_bam = sshared.find_existing_split_discordants(data) sr_bams.append(sr_bam) disc_bams.append(disc_bam) cur_dels, cur_dups = _bedpes_from_cnv_caller(data, work_dir) previous_evidence[dd.get_sample_name(data)] = {} if cur_dels and utils.file_exists(cur_dels): previous_evidence[dd.get_sample_name(data)]["dels"] = cur_dels if cur_dups and utils.file_exists(cur_dups): previous_evidence[dd.get_sample_name(data)]["dups"] = cur_dups lumpy_vcf, exclude_file = _run_smoove(full_bams, sr_bams, disc_bams, work_dir, items) lumpy_vcf = sshared.annotate_with_depth(lumpy_vcf, items) gt_vcfs = {} # Retain paired samples with tumor/normal genotyped in one file if paired and paired.normal_name: batches = [[paired.tumor_data, paired.normal_data]] else: batches = [[x] for x in items] for batch_items in batches: for data in batch_items: gt_vcfs[dd.get_sample_name(data)] = _filter_by_support(lumpy_vcf, data) if paired and paired.normal_name: gt_vcfs = _filter_by_background(paired.tumor_name, [paired.normal_name], gt_vcfs, paired.tumor_data) out = [] upload_counts = collections.defaultdict(int) for data in items: if "sv" not in data: data["sv"] = [] vcf_file = gt_vcfs.get(dd.get_sample_name(data)) if vcf_file: effects_vcf, _ = effects.add_to_vcf(vcf_file, data, "snpeff") data["sv"].append({"variantcaller": "lumpy", "vrn_file": effects_vcf or vcf_file, "do_upload": upload_counts[vcf_file] == 0, # only upload a single file per batch "exclude_file": exclude_file}) upload_counts[vcf_file] += 1 out.append(data) return out
def run(items): paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) purecn_out = _run_purecn(paired, work_dir) purecn_out = _run_purecn_dx(purecn_out, paired) purecn_out["variantcaller"] = "purecn" out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) work_dir = _sv_workdir(paired.tumor_data if paired else items[0]) workflow_file = _prep_config(items, paired, work_dir) variant_file = _run_workflow(items, paired, workflow_file, work_dir) out = [] for data in items: sample_file = _select_sample(data, variant_file, work_dir) if "sv" not in data: data["sv"] = [] effects_vcf, _ = effects.add_to_vcf(sample_file, data, "snpeff") data["sv"].append({"variantcaller": "manta", "vrn_file": effects_vcf or sample_file}) out.append(data) return out
def finalize_sv(orig_vcf, data, items): """Finalize structural variants, adding effects and splitting if needed. """ paired = vcfutils.get_paired(items) # For paired/somatic, attach combined calls to tumor sample if paired: sample_vcf = orig_vcf if paired.tumor_name == dd.get_sample_name( data) else None else: sample_vcf = "%s-%s.vcf.gz" % (utils.splitext_plus(orig_vcf)[0], dd.get_sample_name(data)) sample_vcf = vcfutils.select_sample(orig_vcf, dd.get_sample_name(data), sample_vcf, data["config"]) if sample_vcf: effects_vcf, _ = effects.add_to_vcf(sample_vcf, data, "snpeff") else: effects_vcf = None return effects_vcf or sample_vcf
def _cnvkit_segment(cnr_file, cov_interval, data, items, out_file=None, detailed=False): """Perform segmentation and copy number calling on normalized inputs """ if not out_file: out_file = "%s.cns" % os.path.splitext(cnr_file)[0] if not utils.file_uptodate(out_file, cnr_file): with file_transaction(data, out_file) as tx_out_file: if not _cna_has_values(cnr_file): with open(tx_out_file, "w") as out_handle: out_handle.write("chromosome\tstart\tend\tgene\tlog2\tprobes\tCN1\tCN2\tbaf\tweight\n") else: # Scale cores to avoid memory issues with segmentation # https://github.com/etal/cnvkit/issues/346 if cov_interval == "genome": cores = max(1, dd.get_cores(data) // 2) else: cores = dd.get_cores(data) cmd = [_get_cmd(), "segment", "-p", str(cores), "-o", tx_out_file, cnr_file] small_vrn_files = _compatible_small_variants(data, items) if len(small_vrn_files) > 0 and _cna_has_values(cnr_file) and cov_interval != "genome": cmd += ["--vcf", small_vrn_files[0].name, "--sample-id", small_vrn_files[0].sample] if small_vrn_files[0].normal: cmd += ["--normal-id", small_vrn_files[0].normal] resources = config_utils.get_resources("cnvkit_segment", data["config"]) user_options = resources.get("options", []) cmd += [str(x) for x in user_options] if cov_interval == "genome" and "--threshold" not in user_options: cmd += ["--threshold", "0.00001"] # For tumors, remove very low normalized regions, avoiding upcaptured noise # https://github.com/bcbio/bcbio-nextgen/issues/2171#issuecomment-348333650 # unless we want detailed segmentation for downstream tools paired = vcfutils.get_paired(items) if paired: #if detailed: # cmd += ["-m", "hmm-tumor"] if "--drop-low-coverage" not in user_options: cmd += ["--drop-low-coverage"] # preferentially use conda installed Rscript export_cmd = ("%s && export TMPDIR=%s && " % (utils.get_R_exports(), os.path.dirname(tx_out_file))) do.run(export_cmd + " ".join(cmd), "CNVkit segment") return out_file
def run(items): paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) purecn_out = _run_purecn(paired, work_dir) # XXX Currently finding edge case failures with Dx calling, needs additional testing # purecn_out = _run_purecn_dx(purecn_out, paired) out = [] if paired.normal_data: out.append(paired.normal_data) if purecn_out: purecn_out["variantcaller"] = "purecn" if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purecn_out) out.append(paired.tumor_data) return out
def run(items): paired = vcfutils.get_paired(items) if not paired or not paired.normal_name: logger.info("Skipping PURPLE; need tumor/normal somatic calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) from bcbio import heterogeneity vrn_files = heterogeneity.get_variants(paired.tumor_data, include_germline=False) het_file = _amber_het_file("pon", vrn_files, work_dir, paired) depth_file = _run_cobalt(paired, work_dir) purple_out = _run_purple(paired, het_file, depth_file, vrn_files, work_dir) out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purple_out) out.append(paired.tumor_data) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists(variant_file), "Manta finished without output file %s" % variant_file out = [] for data in items: if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) data["sv"].append({"variantcaller": "manta", "vrn_file": final_vcf}) out.append(data) return out
def run(items): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) data = paired.tumor_data if paired else items[0] work_dir = _sv_workdir(data) variant_file = _get_out_file(work_dir, paired) if not utils.file_exists(variant_file): with file_transaction(data, work_dir) as tx_work_dir: utils.safe_makedir(tx_work_dir) tx_workflow_file = _prep_config(items, paired, tx_work_dir) _run_workflow(items, paired, tx_workflow_file, tx_work_dir) assert utils.file_exists( variant_file), "Manta finished without output file %s" % variant_file variant_file = shared.annotate_with_depth(variant_file, items) out = [] upload_counts = collections.defaultdict(int) for data in items: if "break-point-inspector" in dd.get_tools_on(data): if paired and paired.normal_bam and paired.tumor_name == dd.get_sample_name( data): variant_file = _run_break_point_inspector( data, variant_file, paired, work_dir) if "sv" not in data: data["sv"] = [] final_vcf = shared.finalize_sv(variant_file, data, items) vc = { "variantcaller": "manta", "do_upload": upload_counts[final_vcf] == 0, # only upload a single file per batch "vrn_file": final_vcf } evidence_bam = _get_evidence_bam(work_dir, data) if evidence_bam: vc["read_evidence"] = evidence_bam data["sv"].append(vc) upload_counts[final_vcf] += 1 out.append(data) return out
def run(items): paired = vcfutils.get_paired(items) if not paired or not paired.normal_name: logger.info( "Skipping PURPLE; need tumor/normal somatic calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) from bcbio import heterogeneity vrn_files = heterogeneity.get_variants(paired.tumor_data, include_germline=False) het_file = _amber_het_file("pon", vrn_files, work_dir, paired) depth_file = _run_cobalt(paired, work_dir) purple_out = _run_purple(paired, het_file, depth_file, vrn_files, work_dir) out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purple_out) out.append(paired.tumor_data) return out
def population_variant_regions(items): """Retrieve the variant region BED file from a population of items. If tumor/normal, return the tumor BED file. If a population, return the BED file covering the most bases. """ import pybedtools if len(items) == 1: return dd.get_variant_regions(items[0]) else: paired = vcfutils.get_paired(items) if paired: return dd.get_variant_regions(paired.tumor_data) else: vrs = [] for data in items: vr_bed = dd.get_variant_regions(data) if vr_bed: vrs.append((pybedtools.BedTool(vr_bed).total_coverage(), vr_bed)) vrs.sort(reverse=True) if vrs: return vrs[0][1]
def run(items, background=None): """Perform detection of structural variations with Manta. """ paired = vcfutils.get_paired(items) if paired: inputs = [paired.tumor_data] background = [paired.normal_data] if paired.normal_bam else [] else: assert not background inputs, background = sshared.find_case_control(items) work_dir = _sv_workdir(inputs[0]) variant_file = _run_gridss(inputs, background, work_dir) out = [] for data in items: sample_file = variant_file if "sv" not in data: data["sv"] = [] effects_vcf, _ = effects.add_to_vcf(sample_file, data, "snpeff") data["sv"].append({"variantcaller": "gridss", "vrn_file": effects_vcf or sample_file}) out.append(data) return out
def should_filter(items): """Check if we should do damage filtering on somatic calling with low frequency events. """ return (vcfutils.get_paired(items) is not None and any("damage_filter" in dd.get_tools_on(d) for d in items))
def mutect_caller(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run the MuTect paired analysis algorithm. """ config = items[0]["config"] if out_file is None: out_file = "%s-paired-variants.vcf.gz" % os.path.splitext(align_bams[0])[0] if not file_exists(out_file): base_config = items[0]["config"] broad_runner = broad.runner_from_config(base_config, "mutect") out_file_mutect = (out_file.replace(".vcf", "-mutect.vcf") if "vcf" in out_file else out_file + "-mutect.vcf") broad_runner, params = \ _mutect_call_prep(align_bams, items, ref_file, assoc_files, region, out_file_mutect) if (not isinstance(region, (list, tuple)) and not all(has_aligned_reads(x, region) for x in align_bams)): paired = vcfutils.get_paired(items) vcfutils.write_empty_vcf(out_file, samples=[x for x in (paired.tumor_name, paired.normal_name) if x]) return out_file_orig = "%s-orig%s" % utils.splitext_plus(out_file_mutect) if not file_exists(out_file_orig): with file_transaction(config, out_file_orig) as tx_out_file: # Rationale: MuTect writes another table to stdout, which we don't need params += ["--vcf", tx_out_file, "-o", os.devnull] broad_runner.run_mutect(params) is_paired = "-I:normal" in params if not utils.file_uptodate(out_file_mutect, out_file_orig): out_file_mutect = _fix_mutect_output(out_file_orig, config, out_file_mutect, is_paired) indelcaller = vcfutils.get_indelcaller(base_config) if ("scalpel" in indelcaller.lower() and region and isinstance(region, (tuple, list)) and chromhacks.is_autosomal_or_sex(region[0])): # Scalpel InDels out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") if scalpel.is_installed(items[0]["config"]): if not is_paired: vcfutils.check_paired_problems(items) scalpel._run_scalpel_caller(align_bams, items, ref_file, assoc_files, region=region, out_file=out_file_indels) else: scalpel._run_scalpel_paired(align_bams, items, ref_file, assoc_files, region=region, out_file=out_file_indels) out_file = vcfutils.combine_variant_files(orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=items[0]["sam_ref"], config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) elif "pindel" in indelcaller.lower(): from bcbio.structural import pindel out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") if pindel.is_installed(items[0]["config"]): pindel._run_tumor_pindel_caller(align_bams, items, ref_file, assoc_files, region=region, out_file=out_file_indels) out_file = vcfutils.combine_variant_files(orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=ref_file, config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) elif (("somaticindeldetector" in indelcaller.lower() or "sid" in indelcaller.lower()) and "appistry" in broad_runner.get_mutect_version()): # SomaticIndelDetector InDels out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") params_indels = _SID_call_prep(align_bams, items, ref_file, assoc_files, region, out_file_indels) with file_transaction(config, out_file_indels) as tx_out_file: params_indels += ["-o", tx_out_file] broad_runner.run_mutect(params_indels) out_file = vcfutils.combine_variant_files(orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=items[0]["sam_ref"], config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) return out_file