def run(call_file, ref_file, vrn_files, data): """Run filtering on the input call file, handling SNPs and indels separately. """ algs = [data["config"]["algorithm"]] * len(data.get("vrn_files", [1])) if includes_missingalt(data): logger.info("Removing variants with missing alts from %s." % call_file) call_file = gatk_remove_missingalt(call_file, data) if "gatkcnn" in dd.get_tools_on(data): return _cnn_filter(call_file, vrn_files, data) elif config_utils.use_vqsr(algs, call_file): if vcfutils.is_gvcf_file(call_file): raise ValueError("Cannot force gVCF output with joint calling using tools_on: [gvcf] and use VQSR. " "Try using cutoff-based soft filtering with tools_off: [vqsr]") snp_file, indel_file = vcfutils.split_snps_indels(call_file, ref_file, data["config"]) snp_filter_file = _variant_filtration(snp_file, ref_file, vrn_files, data, "SNP", vfilter.gatk_snp_cutoff) indel_filter_file = _variant_filtration(indel_file, ref_file, vrn_files, data, "INDEL", vfilter.gatk_indel_cutoff) orig_files = [snp_filter_file, indel_filter_file] out_file = "%scombined.vcf.gz" % os.path.commonprefix(orig_files) combined_file = vcfutils.combine_variant_files(orig_files, out_file, ref_file, data["config"]) return combined_file else: snp_filter = vfilter.gatk_snp_cutoff(call_file, data) indel_filter = vfilter.gatk_indel_cutoff(snp_filter, data) return indel_filter
def run(call_file, ref_file, vrn_files, data): """Run filtering on the input call file, handling SNPs and indels separately. """ algs = [data["config"]["algorithm"]] * len(data.get("vrn_files", [1])) if config_utils.use_vqsr(algs): if "gvcf" in dd.get_tools_on(data) and not dd.get_jointcaller(data): raise ValueError( "Cannot force gVCF output with joint calling using tools_on: [gvcf] and use VQSR. " "Try using cutoff-based soft filtering with tools_off: [vqsr]") snp_file, indel_file = vcfutils.split_snps_indels( call_file, ref_file, data["config"]) snp_filter_file = _variant_filtration(snp_file, ref_file, vrn_files, data, "SNP", vfilter.gatk_snp_cutoff) indel_filter_file = _variant_filtration(indel_file, ref_file, vrn_files, data, "INDEL", vfilter.gatk_indel_cutoff) orig_files = [snp_filter_file, indel_filter_file] out_file = "%scombined.vcf.gz" % os.path.commonprefix(orig_files) combined_file = vcfutils.combine_variant_files(orig_files, out_file, ref_file, data["config"]) return combined_file else: snp_filter = vfilter.gatk_snp_cutoff(call_file, data) indel_filter = vfilter.gatk_indel_cutoff(snp_filter, data) return indel_filter
def run(call_file, ref_file, vrn_files, data): """Run filtering on the input call file, handling SNPs and indels separately. """ algs = [data["config"]["algorithm"]] * len(data.get("vrn_files", [1])) if config_utils.use_vqsr(algs): assert "gvcf" not in dd.get_tools_on(data), \ ("Cannot force gVCF output and use VQSR. Try using cutoff-based soft filtering with tools_off: [vqsr]") snp_file, indel_file = vcfutils.split_snps_indels(call_file, ref_file, data["config"]) snp_filter_file = _variant_filtration(snp_file, ref_file, vrn_files, data, "SNP", vfilter.gatk_snp_cutoff) indel_filter_file = _variant_filtration(indel_file, ref_file, vrn_files, data, "INDEL", vfilter.gatk_indel_cutoff) orig_files = [snp_filter_file, indel_filter_file] out_file = "%scombined.vcf.gz" % os.path.commonprefix(orig_files) combined_file = vcfutils.combine_variant_files(orig_files, out_file, ref_file, data["config"]) return _filter_nonref(combined_file, data) else: snp_filter = vfilter.gatk_snp_cutoff(call_file, data) indel_filter = vfilter.gatk_indel_cutoff(snp_filter, data) if "gvcf" not in dd.get_tools_on(data): return _filter_nonref(indel_filter, data) else: return indel_filter