def _filter_by_indels(options, chunk): """Filters a list of SNPs and Indels, such that no SNP is closer to an indel than the value set in options.min_distance_to_indels, and such that no two indels too close. If two or more indels are within this distance, the indel with the highest QUAL score is retained. When no unique highest QUAL score exists, an arbitrary indel is retained among those indels with the highest QUAL score. SNPs are filtered based on prefiltered Indels.""" indels = set([vcf for vcf in chunk if vcfwrap.is_indel(vcf)]) distance_between = options.min_distance_between_indels indel_blacklist = _group_indels_near_position(indels, distance_between) distance_to = options.min_distance_to_indels snp_blacklist = _group_indels_near_position(indels, distance_to) for vcf in chunk: if vcfwrap.is_indel(vcf): blacklisted = indel_blacklist.get(vcf.pos + 1, [vcf]) if vcf is not _select_best_indel(blacklisted): _mark_as_filtered(vcf, "W:%i" % distance_between) elif (vcf.alt != ".") and (vcf.pos in snp_blacklist): # TODO: How to handle heterozygous SNPs near _mark_as_filtered(vcf, "w:%i" % distance_to)
def _filter_by_indels(options, chunk): """Filters a list of SNPs and Indels, such that no SNP is closer to an indel than the value set in options.min_distance_to_indels, and such that no two indels too close. If two or more indels are within this distance, the indel with the highest QUAL score is retained. When no unique highest QUAL score exists, an arbitrary indel is retained among those indels with the highest QUAL score. SNPs are filtered based on prefiltered Indels.""" indels = set([vcf for vcf in chunk if vcfwrap.is_indel(vcf)]) distance_between = options.min_distance_between_indels indel_blacklist = _group_indels_near_position(indels, distance_between) distance_to = options.min_distance_to_indels snp_blacklist = _group_indels_near_position(indels, distance_to) for vcf in chunk: if vcfwrap.is_indel(vcf): blacklisted = indel_blacklist.get(vcf.pos + 1, [vcf]) if vcf is not _select_best_indel(blacklisted): _mark_as_filtered(vcf, "W=%i" % distance_between) elif (vcf.alt != ".") and (vcf.pos in snp_blacklist): # TODO: How to handle heterozygous SNPs near _mark_as_filtered(vcf, "w=%i" % distance_to)
def build_region(options, genotype, bed): # Note that bed.end is a past-the-end coordinate start = max(0, bed.start - options.padding) indels = [] sequence = ["N"] * (bed.end - start) for vcf in filter_vcfs(genotype, bed.contig, start, bed.end): if vcfwrap.is_indel(vcf): indels.append(vcf) else: add_snp(vcf, vcf.pos - start, sequence) if not options.ignore_indels: for vcf in indels: add_indel(options, bed, vcf, sequence) offset = bed.start - start length = bed.end - bed.start truncated = sequence[offset:offset + length] # Discard insertions after the last position truncated[-1] = truncated[-1][:1] return "".join(truncated)