def reverse_indel(var, genome): ''' reverse an indel (with only one alt allele) ''' ref = revcomp(var.ref) alt = revcomp(var.alts[0]) var.pos -= len(ref) _, chrom = prefix_chrom(var) if len(ref) > len(alt): # handle deletions ref = ref[:-len(alt)] alt = genome[chrom][var.pos].seq ref = alt + ref elif len(alt) > len(ref): # handle insertions alt = alt[:-len(ref)] ref = genome[chrom][var.pos].seq alt = ref + alt var.ref, var.alts = ref, [alt] # TODO: reject indels spanning multiple mapping regions. We could remap the # TODO: start position, then add the length to get a predicted end position. # TODO: Then check if this is different from an end position determined by # TODO: getting an original end position, then remapping. return var
def remap(converter, var, genome): ''' converts variant coordinates between genome versions Args: converter: pyLiftover.LiftOver object, for the from and to genome builds var: variant to be converted genome: pyfaidx.Fasta object for reference genome being converted to ''' prefixed, chrom = prefix_chrom(var) try: chrom, pos, strand = get_new_coords(converter, chrom, var.pos) except ValueError: return None if not prefixed: chrom = chrom.strip('chr') if chrom not in CHROMS: return None # set updated coords, and convert back from 0-indexed position var.chrom = chrom var.pos = pos if strand == '-': var = reverse_var(var, genome) if var is not None: var.pos += 1 return var
def test_prefix_chrom_existing(self): var = self.Var() var.chrom = 'chr1' prefixed, chrom = prefix_chrom(var) self.assertTrue(prefixed) self.assertEqual(chrom, 'chr1')
def test_prefix_chrom(self): var = self.Var() var.chrom = '1' prefixed, chrom = prefix_chrom(var) self.assertFalse(prefixed) self.assertEqual(chrom, 'chr1')
def reverse_cnv(var, genome): ''' reverse a CNV ''' _, chrom = prefix_chrom(var) var.pos -= int(var.info['SVLEN']) var.ref = genome[chrom][var.pos].seq # TODO: reject CNVs spanning multiple mapping regions return var
def check_reference(var, genome): ''' account for variants where the reference sequence has changed. This also requires GT, PL and AD fields, and various info fields to be adjusted, if present. ''' _, chrom = prefix_chrom(var) end = var.pos + len(var.ref) ref_base = genome[chrom][var.pos:end].seq if ref_base == var.ref: return var if ref_base not in var.alts: return None idx = var.alts.index(ref_base) var.alts[idx] = var.ref var.ref = ref_base return var