예제 #1
0
def normalize(ref_genome, reader, writer, maxIndelLen=50, cleanOnly=False):
    for record in reader:
        if (record.FILTER != [] and record.FILTER != "."
                and record.FILTER != "PASS" and record.FILTER != None
                or genotype(record) == "0/0"):
            continue  # record filtered
        if (genotype(record) == "." and not is_sv(record, maxIndelLen)):
            continue  # filter variants without genotype information unless they are structural variants
        pos = record.POS - 1
        contig = record.CHROM
        ref = str(record.REF.upper())
        alts = map(lambda a: str(a).upper(), record.ALT)
        all_alleles = alts + [ref]
        redundancy = find_redundancy(all_alleles)
        if cleanOnly:
            write(record, pos, ref, alts, writer)
            continue

        if redundancy:

            def chop(allele):
                return allele[:-redundancy]

            ref = chop(ref)
            alts = map(chop, alts)
        pos, ref, alts = left_normalize(ref_genome, record.CHROM, pos, ref,
                                        alts)
        write(record, pos, ref, alts, writer)
        prev_end = pos + len(ref)
        prev_contig = contig
예제 #2
0
def keep_variant(record,maxIndelLen=50):
    if ( record.FILTER != [] and record.FILTER != "." and record.FILTER != "PASS" and record.FILTER != None or genotype(record) == "0/0"):
        return False # record filtered
    if ( genotype(record) == "." and not is_sv(record, maxIndelLen)):
        return False # filter variants without genotype information unless they are structural variants
    if ( "N" in record.REF or any(map(lambda a: "N" in str(a), record.ALT))):
        return False # filter out ref/alts with N base
    return True
예제 #3
0
def keep_variant(record, maxIndelLen=50):
    if (record.FILTER != [] and record.FILTER != "."
            and record.FILTER != "PASS" and record.FILTER != None
            or genotype(record) == "0/0"):
        return False  # record filtered
    if (genotype(record) == "." and not is_sv(record, maxIndelLen)):
        return False  # filter variants without genotype information unless they are structural variants
    if ("N" in record.REF or any(map(lambda a: "N" in str(a), record.ALT))):
        return False  # filter out ref/alts with N base
    return True
예제 #4
0
def keep_variant(record,maxIndelLen=50):
    if ( record.FILTER != [] and record.FILTER != "." and record.FILTER != "PASS" and record.FILTER != None or genotype(record) == "0/0"):
        return False # record filtered
    if ( genotype(record) == "." and not is_sv(record, maxIndelLen)):
        return False # filter variants without genotype information unless they are structural variants
    return True
예제 #5
0
파일: normalize_vcf.py 프로젝트: nuin/smash
def keep_variant(record,maxIndelLen=50):
    if ( record.FILTER != [] and record.FILTER != "." and record.FILTER != "PASS" and record.FILTER != None or genotype(record) == "0/0"):
        return False # record filtered
    if ( genotype(record) == "." and not is_sv(record, maxIndelLen)):
        return False # filter variants without genotype information unless they are structural variants
    return True