def test_make_variant_panel_stop_codon(self): variants = list( self.gm.get_variant_names("katG", "W90*", protein_coding_var=True)) assert len(variants) == 3 refs = sorted([split_var_name(v)[0] for v in variants]) alts = sorted([split_var_name(v)[-1] for v in variants]) var = variants[0] ref, start, alt = split_var_name(var) assert start == 2155842 assert refs == ["CCA"] * 3 assert alts == sorted(["TTA", "CTA", "TCA"])
def __init__( self, var_name, reference, gene=None, mut=None): self.var_name = var_name self.gene = gene if mut: tmp, self.start, tmp = split_var_name(mut) self.ref, tmp, self.alt = split_var_name(var_name) self.standard_table = CodonTable.unambiguous_dna_by_name["Standard"] self.reference = reference
def __init__(self, var_name, reference, gene=None, mut=None, protein_coding_var=False): self.var_name = var_name self.gene = gene if mut: tmp, self.start, tmp = split_var_name(mut) self.ref, tmp, self.alt = split_var_name(var_name) self.standard_table = CodonTable.unambiguous_dna_by_name["Standard"] self.reference = reference self.input_mutation_name = mut self.protein_coding_var = protein_coding_var
def test_make_variant_panel8(self): ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta") gene = self.gm.get_gene("eis") variants = list( self.gm.get_variant_names("eis", "TG-1T", protein_coding_var=False)) assert len(variants) == 1 var = variants[0] ref, start, alt = split_var_name(var) assert ref == 'CA' assert start == 2715332 assert alt == 'A' v = Variant.create(variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt]) panel = ag.create(v) assert len(panel.alts) == 1 alt = panel.alts[0] # the panel ref/alt seqs go past the end of the gene, # so can't comparie against gene sequence. Need to get # subseq from the reference seq panel_ref_start = self.reference_seq.find(panel.refs[0]) assert panel_ref_start < start < panel_ref_start + len(panel.refs[0]) seq = str(self.reference_seq[panel_ref_start:panel_ref_start + len(panel.refs[0])]) assert seq == panel.refs[0] print(alt, seq[:31] + seq[31:]) assert alt == seq[:30] + seq[31:] DB.drop_database('mykrobe-test')
def test_make_variant_panel6(self): ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31) gene = self.gm.get_gene("pncA") variants = list( self.gm.get_variant_names("pncA", "CAG28TAA", protein_coding_var=False)) assert len(variants) == 1 var = variants[0] ref, start, alt = split_var_name(var) assert ref == "CTG" assert start == 2289212 assert alt == "TTA" v = Variant.create( variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt], ) panel = ag.create(v) assert len(panel.alts) == 1 alt = panel.alts[0] # the panel ref/alt seqs go past the end of the gene, # so can't comparie against gene sequence. Need to get # subseq from the reference seq panel_ref_start = self.reference_seq.find(panel.refs[0]) assert panel_ref_start < start < panel_ref_start + len(panel.refs[0]) seq = str(self.reference_seq[panel_ref_start:panel_ref_start + len(panel.refs[0])]) assert seq == panel.refs[0] assert alt == seq[:30] + "TTA" + seq[33:] DB.drop_database("mykrobe-test")
def variant(self): ref, start, alt = split_var_name(self.var_name) return Variant.create(variant_sets=None, start=int(start), end=0, reference_bases=ref, alternate_bases=[alt], reference=self.reference)
def get_variant_names(self, gene, mutation, protein_coding_var=True): ref, start, alt = split_var_name(mutation) gene = self.get_gene(gene) if start < 0 or not protein_coding_var: return self._process_DNA_mutation(gene, ref, start, alt) elif start > 0: return self._process_coding_mutation(gene, ref, start, alt) else: raise ValueError( "Variants are defined in 1-based coordinates. You can't have pos 0. ")
def test_make_variant_panel5(self): ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta") gene = self.gm.get_gene("gyrA") for var in self.gm.get_variant_names("gyrA", "D94X"): ref, start, alt = split_var_name(var) v = Variant.create(variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt]) panel = ag.create(v) for alt in panel.alts: seq = copy.copy(str(gene.seq)) seq = seq.replace(panel.refs[0], alt) assert Seq(seq).translate()[93] != "D" DB.drop_database('mykrobe-test')
def test_make_variant_panel4(self): ag = AlleleGenerator("src/mykrobe/data/NC_000962.3.fasta") gene = self.gm.get_gene("katG") for var in self.gm.get_variant_names("katG", "W90R"): ref, start, alt = split_var_name(var) v = Variant.create(variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt]) panel = ag.create(v) for alt in panel.alts: seq = copy.copy(str(gene.seq.reverse_complement())) seq = seq.replace(panel.refs[0], alt) assert seq != str(gene.seq) assert Seq(seq).reverse_complement().translate()[89] == "R" DB.drop_database('mykrobe-test')
def _create_variant(self, probe_name): names = [] params = get_params(probe_name) if params.get("mut"): names.append("_".join([params.get("gene"), params.get("mut")])) var_name = probe_name.split('?')[0].split('-')[1] names.append(var_name) try: # If it's a variant panel we can create a variant ref, start, alt = split_var_name(var_name) return Variant.create(start=start, reference_bases=ref, alternate_bases=[alt], names=names, info=params) except AttributeError: return None
def test_make_variant_panel7(self): # Test DNA change upstream of a gene on the reverse # strand. The variant G-10A is in "gene space", ie # 10 bases upstream of eis is the nucleotide G on the # reverse strand. That position is 2715342 in the genome, # and is C on the forwards strand. # Here's a diagram: # | <- This C is at -10 in "gene space", so variant G-10A has ref=G # | ref coord is 2715342, and variant in "ref space" is C2715342T # CACAGAATCCGACTGTGGCATATGCCGC # | # | <- C = last nucleotide of gene, at 2715332 ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31) gene = self.gm.get_gene("eis") variants = list( self.gm.get_variant_names("eis", "G-10A", protein_coding_var=False)) assert len(variants) == 1 var = variants[0] ref, start, alt = split_var_name(var) assert ref == "C" assert start == 2715342 assert alt == "T" v = Variant.create( variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt], ) panel = ag.create(v) assert len(panel.alts) == 1 alt = panel.alts[0] # the panel ref/alt seqs go past the end of the gene, # so can't comparie against gene sequence. Need to get # subseq from the reference seq panel_ref_start = self.reference_seq.find(panel.refs[0]) assert panel_ref_start < start < panel_ref_start + len(panel.refs[0]) seq = str(self.reference_seq[panel_ref_start:panel_ref_start + len(panel.refs[0])]) assert seq == panel.refs[0] assert alt == seq[:30] + "T" + seq[31:] DB.drop_database("mykrobe-test")
def test_make_variant_panel1(self): ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta") gene = self.gm.get_gene("rpoB") for var in self.gm.get_variant_names("rpoB", "D3A"): ref, start, alt = split_var_name(var) v = Variant.create( variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt]) panel = ag.create(v) for alt in panel.alts: seq = copy.copy(str(gene.seq)) assert Seq(seq).translate()[2] == "D" seq = seq.replace(panel.refs[0][25:], alt[24:]) assert seq != str(gene.seq) assert Seq(seq).translate()[2] == "A" DB.drop_database('mykrobe-test')
def test_make_variant_panel2(self): ag = AlleleGenerator(f"{DATA_DIR}/NC_000962.3.fasta", kmer=31) gene = self.gm.get_gene("katG") for var in self.gm.get_variant_names("katG", "E3A"): ref, start, alt = split_var_name(var) v = Variant.create( variant_sets=self.variant_sets, reference=self.reference_id, reference_bases=ref, start=start, alternate_bases=[alt], ) panel = ag.create(v) for alt in panel.alts: seq = copy.copy(str(gene.seq.reverse_complement())) seq = seq.replace(panel.refs[0][:39], alt[:39 + len(alt) - len(panel.refs[0])]) assert seq != str(gene.seq) assert Seq(seq).reverse_complement().translate()[2] == "A" DB.drop_database("mykrobe-test")
def test_split_name_del(self): name = "AA12T" r, pos, a = split_var_name(name) assert r == "AA" assert pos == 12 assert a == "T"
def test_split_name3(self): name = "C-54T" r, pos, a = split_var_name(name) assert r == "C" assert pos == -54 assert a == "T"
def test_split_name2(self): name = "A12T/A" r, pos, a = split_var_name(name) assert r == "A" assert pos == 12 assert a == "T/A"