def _check_for_problem_somatic_batches(items, config): """Identify problem batch setups for somatic calling. We do not support multiple tumors in a single batch and VarDict(Java) does not handle pooled calling, only tumor/normal. """ to_check = [] for data in items: data = copy.deepcopy(data) data["config"] = config_utils.update_w_custom(config, data) to_check.append(data) data_by_batches = collections.defaultdict(list) for data in to_check: batches = dd.get_batches(data) if batches: for batch in batches: data_by_batches[batch].append(data) for batch, items in data_by_batches.items(): if vcfutils.get_paired(items): vcfutils.check_paired_problems(items) elif len(items) > 1: vcs = list(set(tz.concat([dd.get_variantcaller(data) or [] for data in items]))) if any(x.lower().startswith("vardict") for x in vcs): raise ValueError("VarDict does not support pooled non-tumor/normal calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items])) elif any(x.lower() == "mutect" for x in vcs): raise ValueError("Mutect requires a 'phenotype: tumor' sample for calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items]))
def _check_for_problem_somatic_batches(items, config): """Identify problem batch setups for somatic calling. We do not support multiple tumors in a single batch and VarDict(Java) does not handle pooled calling, only tumor/normal. """ to_check = [] for data in items: data = copy.deepcopy(data) data["config"] = config_utils.update_w_custom(config, data) to_check.append(data) data_by_batches = collections.defaultdict(list) for data in to_check: batches = dd.get_batches(data) if batches: for batch in batches: data_by_batches[batch].append(data) for batch, items in data_by_batches.items(): if vcfutils.get_paired(items): vcfutils.check_paired_problems(items) elif len(items) > 1: vcs = list( set( tz.concat( [dd.get_variantcaller(data) or [] for data in items]))) if any(x.lower().startswith("vardict") for x in vcs): raise ValueError( "VarDict does not support pooled non-tumor/normal calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items]))
def _check_for_problem_somatic_batches(items, config): """Identify problem batch setups for somatic calling. We do not support multiple tumors in a single batch and VarDict(Java) does not handle pooled calling, only tumor/normal. """ to_check = [] for data in items: data = copy.deepcopy(data) data["config"] = config_utils.update_w_custom(config, data) to_check.append(data) data_by_batches = collections.defaultdict(list) for data in to_check: batches = dd.get_batches(data) if batches: for batch in batches: data_by_batches[batch].append(data) for batch, items in data_by_batches.items(): if vcfutils.get_paired(items): vcfutils.check_paired_problems(items) elif len(items) > 1: vcs = vcfutils.get_somatic_variantcallers(items) if "vardict" in vcs: raise ValueError( "VarDict does not support pooled non-tumor/normal calling, in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items])) elif "mutect" in vcs or "mutect2" in vcs: raise ValueError( "MuTect and MuTect2 require a 'phenotype: tumor' sample for calling, " "in batch %s: %s" % (batch, [dd.get_sample_name(data) for data in items]))
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run FreeBayes variant calling, either paired tumor/normal or germline calling. """ if is_paired_analysis(align_bams, items): paired = get_paired_bams(align_bams, items) if not paired.normal_bam: call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file, somatic=paired) else: call_file = _run_freebayes_paired(align_bams, items, ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run FreeBayes variant calling, either paired tumor/normal or germline calling. """ items = shared.add_highdepth_genome_exclusion(items) if is_paired_analysis(align_bams, items): paired = get_paired_bams(align_bams, items) if not paired.normal_bam: call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file, somatic=paired) else: call_file = _run_freebayes_paired( [paired.tumor_bam, paired.normal_bam], [paired.tumor_data, paired.normal_data], ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file
def run_vardict(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run VarDict variant calling. """ if vcfutils.is_paired_analysis(align_bams, items): call_file = _run_vardict_paired(align_bams, items, ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_vardict_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run FreeBayes variant calling, either paired tumor/normal or germline calling. """ if is_paired_analysis(align_bams, items): call_file = _run_freebayes_paired(align_bams, items, ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file
def run_varscan(align_bams, items, ref_file, assoc_files, region=None, out_file=None): if is_paired_analysis(align_bams, items): call_file = samtools.shared_variantcall(_varscan_paired, "varscan", align_bams, ref_file, items, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = samtools.shared_variantcall(_varscan_work, "varscan", align_bams, ref_file, items, assoc_files, region, out_file) return call_file
def run_varscan(align_bams, items, ref_file, assoc_files, region=None, out_file=None): paired = get_paired_bams(align_bams, items) if paired and paired.normal_bam and paired.tumor_bam: call_file = samtools.shared_variantcall(_varscan_paired, "varscan", align_bams, ref_file, items, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = samtools.shared_variantcall(_varscan_work, "varscan", align_bams, ref_file, items, assoc_files, region, out_file) return call_file
def run_freebayes(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run FreeBayes variant calling, either paired tumor/normal or germline calling. """ if is_paired_analysis(align_bams, items): paired = get_paired_bams(align_bams, items) if not paired.normal_bam: call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file, somatic=paired) else: call_file = _run_freebayes_paired([paired.tumor_bam, paired.normal_bam], [paired.tumor_data, paired.normal_data], ref_file, assoc_files, region, out_file) else: vcfutils.check_paired_problems(items) call_file = _run_freebayes_caller(align_bams, items, ref_file, assoc_files, region, out_file) return call_file
def mutect_caller(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run the MuTect paired analysis algorithm. """ config = items[0]["config"] if out_file is None: out_file = "%s-paired-variants.vcf.gz" % os.path.splitext(align_bams[0])[0] if not file_exists(out_file): base_config = items[0]["config"] broad_runner = broad.runner_from_config(base_config, "mutect") out_file_mutect = (out_file.replace(".vcf", "-mutect.vcf") if "vcf" in out_file else out_file + "-mutect.vcf") broad_runner, params = \ _mutect_call_prep(align_bams, items, ref_file, assoc_files, region, out_file_mutect) if (not isinstance(region, (list, tuple)) and not all(has_aligned_reads(x, region) for x in align_bams)): vcfutils.write_empty_vcf(out_file) return out_file_orig = "%s-orig%s" % utils.splitext_plus(out_file_mutect) with file_transaction(config, out_file_orig) as tx_out_file: # Rationale: MuTect writes another table to stdout, which we don't need params += ["--vcf", tx_out_file, "-o", os.devnull] broad_runner.run_mutect(params) is_paired = "-I:normal" in params out_file_mutect = _fix_mutect_output(out_file_orig, config, out_file_mutect, is_paired) indelcaller = vcfutils.get_indelcaller(base_config) if "scalpel" in indelcaller.lower(): # Scalpel InDels out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") if scalpel.is_installed(items[0]["config"]): with file_transaction(config, out_file_indels) as tx_out_file2: if not is_paired: vcfutils.check_paired_problems(items) scalpel._run_scalpel_caller(align_bams, items, ref_file, assoc_files, region=region, out_file=tx_out_file2) else: scalpel._run_scalpel_paired(align_bams, items, ref_file, assoc_files, region=region, out_file=tx_out_file2) out_file = vcfutils.combine_variant_files(orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=items[0]["sam_ref"], config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) elif "pindel" in indelcaller.lower(): out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") if pindel.is_installed(items[0]["config"]): pindel._run_tumor_pindel_caller(align_bams, items, ref_file, assoc_files, region=region, out_file=out_file_indels) out_file = vcfutils.combine_variant_files(orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=ref_file, config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) elif (("somaticindeldetector" in indelcaller.lower() or "sid" in indelcaller.lower()) and "appistry" in broad_runner.get_mutect_version()): # SomaticIndelDetector InDels out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") params_indels = _SID_call_prep(align_bams, items, ref_file, assoc_files, region, out_file_indels) with file_transaction(config, out_file_indels) as tx_out_file: params_indels += ["-o", tx_out_file] broad_runner.run_mutect(params_indels) out_file = vcfutils.combine_variant_files(orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=items[0]["sam_ref"], config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) return out_file
def mutect_caller(align_bams, items, ref_file, assoc_files, region=None, out_file=None): """Run the MuTect paired analysis algorithm. """ config = items[0]["config"] if out_file is None: out_file = "%s-paired-variants.vcf.gz" % os.path.splitext( align_bams[0])[0] if not file_exists(out_file): base_config = items[0]["config"] broad_runner = broad.runner_from_config(base_config, "mutect") out_file_mutect = (out_file.replace(".vcf", "-mutect.vcf") if "vcf" in out_file else out_file + "-mutect.vcf") broad_runner, params = \ _mutect_call_prep(align_bams, items, ref_file, assoc_files, region, out_file_mutect) if (not isinstance(region, (list, tuple)) and not all(has_aligned_reads(x, region) for x in align_bams)): vcfutils.write_empty_vcf(out_file) return out_file_orig = "%s-orig%s" % utils.splitext_plus(out_file_mutect) if not file_exists(out_file_orig): with file_transaction(config, out_file_orig) as tx_out_file: # Rationale: MuTect writes another table to stdout, which we don't need params += ["--vcf", tx_out_file, "-o", os.devnull] broad_runner.run_mutect(params) is_paired = "-I:normal" in params if not utils.file_uptodate(out_file_mutect, out_file_orig): out_file_mutect = _fix_mutect_output(out_file_orig, config, out_file_mutect, is_paired) indelcaller = vcfutils.get_indelcaller(base_config) if ("scalpel" in indelcaller.lower() and region and isinstance(region, (tuple, list)) and chromhacks.is_autosomal_or_sex(region[0])): # Scalpel InDels out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") if scalpel.is_installed(items[0]["config"]): if not is_paired: vcfutils.check_paired_problems(items) scalpel._run_scalpel_caller(align_bams, items, ref_file, assoc_files, region=region, out_file=out_file_indels) else: scalpel._run_scalpel_paired(align_bams, items, ref_file, assoc_files, region=region, out_file=out_file_indels) out_file = vcfutils.combine_variant_files( orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=items[0]["sam_ref"], config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) elif "pindel" in indelcaller.lower(): from bcbio.structural import pindel out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") if pindel.is_installed(items[0]["config"]): pindel._run_tumor_pindel_caller(align_bams, items, ref_file, assoc_files, region=region, out_file=out_file_indels) out_file = vcfutils.combine_variant_files( orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=ref_file, config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) elif (("somaticindeldetector" in indelcaller.lower() or "sid" in indelcaller.lower()) and "appistry" in broad_runner.get_mutect_version()): # SomaticIndelDetector InDels out_file_indels = (out_file.replace(".vcf", "-somaticIndels.vcf") if "vcf" in out_file else out_file + "-somaticIndels.vcf") params_indels = _SID_call_prep(align_bams, items, ref_file, assoc_files, region, out_file_indels) with file_transaction(config, out_file_indels) as tx_out_file: params_indels += ["-o", tx_out_file] broad_runner.run_mutect(params_indels) out_file = vcfutils.combine_variant_files( orig_files=[out_file_mutect, out_file_indels], out_file=out_file, ref_file=items[0]["sam_ref"], config=items[0]["config"], region=region) else: utils.symlink_plus(out_file_mutect, out_file) return out_file