Пример #1
0
def _filter_by_indels(options, chunk):
    """Filters a list of SNPs and Indels, such that no SNP is closer to
    an indel than the value set in options.min_distance_to_indels, and
    such that no two indels too close. If two or more indels are within
    this distance, the indel with the highest QUAL score is retained. When
    no unique highest QUAL score exists, an arbitrary indel is retained
    among those indels with the highest QUAL score. SNPs are filtered
    based on prefiltered Indels."""
    indels = set([vcf for vcf in chunk if vcfwrap.is_indel(vcf)])

    distance_between = options.min_distance_between_indels
    indel_blacklist  = _group_indels_near_position(indels, distance_between)
    distance_to      = options.min_distance_to_indels
    snp_blacklist    = _group_indels_near_position(indels, distance_to)

    for vcf in chunk:
        if vcfwrap.is_indel(vcf):
            blacklisted = indel_blacklist.get(vcf.pos + 1, [vcf])
            if vcf is not _select_best_indel(blacklisted):
                _mark_as_filtered(vcf, "W:%i" % distance_between)
        elif (vcf.alt != ".") and (vcf.pos in snp_blacklist):
            # TODO: How to handle heterozygous SNPs near
            _mark_as_filtered(vcf, "w:%i" % distance_to)
Пример #2
0
def _filter_by_indels(options, chunk):
    """Filters a list of SNPs and Indels, such that no SNP is closer to
    an indel than the value set in options.min_distance_to_indels, and
    such that no two indels too close. If two or more indels are within
    this distance, the indel with the highest QUAL score is retained. When
    no unique highest QUAL score exists, an arbitrary indel is retained
    among those indels with the highest QUAL score. SNPs are filtered
    based on prefiltered Indels."""
    indels = set([vcf for vcf in chunk if vcfwrap.is_indel(vcf)])

    distance_between = options.min_distance_between_indels
    indel_blacklist = _group_indels_near_position(indels, distance_between)
    distance_to = options.min_distance_to_indels
    snp_blacklist = _group_indels_near_position(indels, distance_to)

    for vcf in chunk:
        if vcfwrap.is_indel(vcf):
            blacklisted = indel_blacklist.get(vcf.pos + 1, [vcf])
            if vcf is not _select_best_indel(blacklisted):
                _mark_as_filtered(vcf, "W=%i" % distance_between)
        elif (vcf.alt != ".") and (vcf.pos in snp_blacklist):
            # TODO: How to handle heterozygous SNPs near
            _mark_as_filtered(vcf, "w=%i" % distance_to)
Пример #3
0
def build_region(options, genotype, bed):
    # Note that bed.end is a past-the-end coordinate
    start = max(0, bed.start - options.padding)

    indels = []
    sequence = ["N"] * (bed.end - start)
    for vcf in filter_vcfs(genotype, bed.contig, start, bed.end):
        if vcfwrap.is_indel(vcf):
            indels.append(vcf)
        else:
            add_snp(vcf, vcf.pos - start, sequence)

    if not options.ignore_indels:
        for vcf in indels:
            add_indel(options, bed, vcf, sequence)

    offset = bed.start - start
    length = bed.end - bed.start
    truncated = sequence[offset:offset + length]

    # Discard insertions after the last position
    truncated[-1] = truncated[-1][:1]

    return "".join(truncated)
Пример #4
0
def build_region(options, genotype, bed):
    # Note that bed.end is a past-the-end coordinate
    start = max(0, bed.start - options.padding)

    indels = []
    sequence = ["N"] * (bed.end - start)
    for vcf in filter_vcfs(genotype, bed.contig, start, bed.end):
        if vcfwrap.is_indel(vcf):
            indels.append(vcf)
        else:
            add_snp(vcf, vcf.pos - start, sequence)

    if not options.ignore_indels:
        for vcf in indels:
            add_indel(options, bed, vcf, sequence)

    offset = bed.start - start
    length = bed.end - bed.start
    truncated = sequence[offset:offset + length]

    # Discard insertions after the last position
    truncated[-1] = truncated[-1][:1]

    return "".join(truncated)