def normalize(ref_genome, reader, writer, maxIndelLen=50, cleanOnly=False): for record in reader: if (record.FILTER != [] and record.FILTER != "." and record.FILTER != "PASS" and record.FILTER != None or genotype(record) == "0/0"): continue # record filtered if (genotype(record) == "." and not is_sv(record, maxIndelLen)): continue # filter variants without genotype information unless they are structural variants pos = record.POS - 1 contig = record.CHROM ref = str(record.REF.upper()) alts = map(lambda a: str(a).upper(), record.ALT) all_alleles = alts + [ref] redundancy = find_redundancy(all_alleles) if cleanOnly: write(record, pos, ref, alts, writer) continue if redundancy: def chop(allele): return allele[:-redundancy] ref = chop(ref) alts = map(chop, alts) pos, ref, alts = left_normalize(ref_genome, record.CHROM, pos, ref, alts) write(record, pos, ref, alts, writer) prev_end = pos + len(ref) prev_contig = contig
def keep_variant(record,maxIndelLen=50): if ( record.FILTER != [] and record.FILTER != "." and record.FILTER != "PASS" and record.FILTER != None or genotype(record) == "0/0"): return False # record filtered if ( genotype(record) == "." and not is_sv(record, maxIndelLen)): return False # filter variants without genotype information unless they are structural variants if ( "N" in record.REF or any(map(lambda a: "N" in str(a), record.ALT))): return False # filter out ref/alts with N base return True
def keep_variant(record, maxIndelLen=50): if (record.FILTER != [] and record.FILTER != "." and record.FILTER != "PASS" and record.FILTER != None or genotype(record) == "0/0"): return False # record filtered if (genotype(record) == "." and not is_sv(record, maxIndelLen)): return False # filter variants without genotype information unless they are structural variants if ("N" in record.REF or any(map(lambda a: "N" in str(a), record.ALT))): return False # filter out ref/alts with N base return True
def keep_variant(record,maxIndelLen=50): if ( record.FILTER != [] and record.FILTER != "." and record.FILTER != "PASS" and record.FILTER != None or genotype(record) == "0/0"): return False # record filtered if ( genotype(record) == "." and not is_sv(record, maxIndelLen)): return False # filter variants without genotype information unless they are structural variants return True