def test_standard_code_lookup(self): """genetic_code should hold codes keyed by id as string and number""" sgc_new = GeneticCode(*self.ncbi_standard) sgc_number = genetic_code(1) sgc_string = genetic_code("1") sgc_empty = genetic_code() for sgc in sgc_new, sgc_number, sgc_string, sgc_empty: self.assertEqual(sgc.code_sequence, "FFLLSSSSYY**CC*WLLLLPPPPHHQQR" "RRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG") self.assertEqual( sgc.start_codon_sequence, "---M---------------M--" "-------------M----------------------------" ) self.assertEqual(sgc.start_codons, {"TTG": "M", "CTG": "M", "ATG": "M"}) self.assertEqual(sgc.id, 1) self.assertEqual(sgc.name, "Standard Nuclear") self.assertEqual(sgc["TTT"], "F") self.assertEqual(sgc.is_start("ATG"), True) self.assertEqual(sgc.is_start("AAA"), False) self.assertEqual(sgc.is_stop("TAA"), True) self.assertEqual(sgc.is_stop("AAA"), False) mtgc = genetic_code(2) self.assertEqual(mtgc.name, "Vertebrate Mitochondrial") self.assertEqual(mtgc.is_start("AUU"), True) self.assertEqual(mtgc.is_stop("UGA"), False) self.assertEqual(sgc_new.changes(mtgc), {"AGA": "R*", "AGG": "R*", "ATA": "IM", "TGA": "*W"}) self.assertEqual(mtgc.changes(sgc_new), {"AGA": "*R", "AGG": "*R", "ATA": "MI", "TGA": "W*"}) self.assertEqual(mtgc.changes(mtgc), {}) self.assertEqual( mtgc.changes("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTT" "TNNKKSSRRVVVVAAAADDEEGGGG"), {"AGA": "*R", "AGG": "*R", "ATA": "MI", "TGA": "W*"}, )
def test_standard_code_lookup(self): """genetic_code should hold codes keyed by id as string and number""" sgc_new = GeneticCode(*self.ncbi_standard) sgc_number = genetic_code(1) sgc_string = genetic_code('1') sgc_empty = genetic_code() for sgc in sgc_new, sgc_number, sgc_string, sgc_empty: self.assertEqual(sgc.code_sequence, 'FFLLSSSSYY**CC*WLLLLPPPPHHQQR' 'RRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG') self.assertEqual(sgc.start_codon_sequence, '---M---------------M--' '-------------M----------------------------') self.assertEqual( sgc.start_codons, {'TTG': 'M', 'CTG': 'M', 'ATG': 'M'}) self.assertEqual(sgc.id, 1) self.assertEqual(sgc.name, 'Standard Nuclear') self.assertEqual(sgc['TTT'], 'F') self.assertEqual(sgc.is_start('ATG'), True) self.assertEqual(sgc.is_start('AAA'), False) self.assertEqual(sgc.is_stop('TAA'), True) self.assertEqual(sgc.is_stop('AAA'), False) mtgc = genetic_code(2) self.assertEqual(mtgc.name, 'Vertebrate Mitochondrial') self.assertEqual(mtgc.is_start('AUU'), True) self.assertEqual(mtgc.is_stop('UGA'), False) self.assertEqual(sgc_new.changes(mtgc), {'AGA': 'R*', 'AGG': 'R*', 'ATA': 'IM', 'TGA': '*W'}) self.assertEqual(mtgc.changes(sgc_new), {'AGA': '*R', 'AGG': '*R', 'ATA': 'MI', 'TGA': 'W*'}) self.assertEqual(mtgc.changes(mtgc), {}) self.assertEqual(mtgc.changes('FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTT' 'TNNKKSSRRVVVVAAAADDEEGGGG'), {'AGA': '*R', 'AGG': '*R', 'ATA': 'MI', 'TGA': 'W*'})
def test_standard_code_lookup(self): """genetic_code should hold codes keyed by id as string and number""" sgc_new = GeneticCode(*self.ncbi_standard) sgc_number = genetic_code(1) sgc_string = genetic_code('1') sgc_empty = genetic_code() for sgc in sgc_new, sgc_number, sgc_string, sgc_empty: self.assertEqual( sgc.code_sequence, 'FFLLSSSSYY**CC*WLLLLPPPPHHQQR' 'RRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG') self.assertEqual( sgc.start_codon_sequence, '---M---------------M--' '-------------M----------------------------') self.assertEqual(sgc.start_codons, { 'TTG': 'M', 'CTG': 'M', 'ATG': 'M' }) self.assertEqual(sgc.id, 1) self.assertEqual(sgc.name, 'Standard Nuclear') self.assertEqual(sgc['TTT'], 'F') self.assertEqual(sgc.is_start('ATG'), True) self.assertEqual(sgc.is_start('AAA'), False) self.assertEqual(sgc.is_stop('TAA'), True) self.assertEqual(sgc.is_stop('AAA'), False) mtgc = genetic_code(2) self.assertEqual(mtgc.name, 'Vertebrate Mitochondrial') self.assertEqual(mtgc.is_start('AUU'), True) self.assertEqual(mtgc.is_stop('UGA'), False) self.assertEqual(sgc_new.changes(mtgc), { 'AGA': 'R*', 'AGG': 'R*', 'ATA': 'IM', 'TGA': '*W' }) self.assertEqual(mtgc.changes(sgc_new), { 'AGA': '*R', 'AGG': '*R', 'ATA': 'MI', 'TGA': 'W*' }) self.assertEqual(mtgc.changes(mtgc), {}) self.assertEqual( mtgc.changes('FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTT' 'TNNKKSSRRVVVVAAAADDEEGGGG'), { 'AGA': '*R', 'AGG': '*R', 'ATA': 'MI', 'TGA': 'W*' })
def two_dna_point_mutants_to_aa(wt_sequence): """ wt_sequence (string) DNA sequence assumes starting from correct reading frame """ AA_sequences = set() orig_code = genetic_code(11) AA_sequences.add(orig_code.translate(wt_sequence).sequence) for k1, char1 in enumerate(wt_sequence[:-1]): for k2, char2 in enumerate(wt_sequence[k1+1:]): for mutant1 in ['A','C','G','U']: if mutant1 == char1: continue for mutant2 in ['A','C','G','U']: if k1 == 0: if k2 < len(wt_sequence): this_dna_string = ""+mutant1+wt_sequence[k1+1:k2]+mutant2+wt_sequence[k2+1:] else: this_dna_string = ""+mutant1+wt_sequence[k1+1:k2]+mutant2 if k1 > 0 and k1 < len(wt_sequence)-1: if k2 < len(wt_sequence): this_dna_string = ""+wt_sequence[0:k1]+mutant1+wt_sequence[k1+1:k2]+mutant2+wt_sequence[k2+1:] else: this_dna_string = ""+wt_sequence[0:k1]+mutant1+wt_sequence[k1+1:k2]+mutant2 if k1 == len(wt_sequence)-1: this_dna_string = ""+wt_sequence[0:k1]+mutant1+mutant2 this_sequence = orig_code.translate(this_dna_string).sequence if '*' in this_sequence[:-1]: continue else: AA_sequences.add(this_sequence) return AA_sequences
def all_dna_point_mutants_to_aa(wt_sequence): """ wt_sequence (string) DNA sequence assumes starting from correct reading frame """ AA_sequences = set() orig_code = genetic_code(11) AA_sequences.add(orig_code.translate(wt_sequence).sequence) for k, char in enumerate(wt_sequence): for mutant in ['A','C','G','U']: if mutant == char: continue if k == 0: this_dna_string = ""+mutant+wt_sequence[k+1:] if k > 0 and k < len(wt_sequence): this_dna_string = ""+wt_sequence[0:k]+mutant+wt_sequence[k+1:] if k == len(wt_sequence): this_dna_string = ""+wt_sequence[0:k]+mutant this_sequence = orig_code.translate(this_dna_string).sequence if '*' in this_sequence[:-1]: continue else: AA_sequences.add(this_sequence) return AA_sequences
def dna_to_aa(sequence, try_frames=False): """ Translates from the input DNA nucleotide sequence to amino acid sequence Arguments: ---------- sequence : str DNA nucleotide sequence Optional: --------- try_frames : Bool if True, tries 6 possible reading frames, translates all to amino acids and chooses sequence with fewest stop codons default = False Returns: -------- aa_sequence : str sequence of one-letter amino acid codes """ orig_code = genetic_code(11) if not try_frames: return orig_code.translate(sequence).sequence sequence = DNASequence(sequence) translated = orig_code.translate_six_frames(sequence) stops = [aastring.sequence.count('*') for aastring in translated] return translated[stops.index(min(stops))].sequence
def two_dna_point_mutants_to_aa(wt_sequence): """ Finds all potential sequences which can be achieved by making 2 nucleotide mutations and translating to amino acid sequence Ignores mutations that lead to nonsense instead of missense mutations Assumes wt_sequence starts on the correct reading frame Makes double mutants via nested loops through each nucleotide in the sequence twice, is therefore VERY SLOW Arguments: ---------- wt_sequence : str DNA nucleotide sequence Returns: -------- AA_sequences : set of str each str is a unique sequence of one-letter amino acid codes """ AA_sequences = set() orig_code = genetic_code(11) AA_sequences.add(orig_code.translate(wt_sequence).sequence) for k1, char1 in enumerate(wt_sequence[:-1]): for k2, char2 in enumerate(wt_sequence[k1 + 1:]): for mutant1 in ['A', 'C', 'G', 'U']: if mutant1 == char1: continue for mutant2 in ['A', 'C', 'G', 'U']: if k1 == 0: if k2 < len(wt_sequence): this_dna_string = "" + mutant1 + wt_sequence[ k1 + 1:k2] + mutant2 + wt_sequence[k2 + 1:] else: this_dna_string = "" + mutant1 + wt_sequence[ k1 + 1:k2] + mutant2 if k1 > 0 and k1 < len(wt_sequence) - 1: if k2 < len(wt_sequence): this_dna_string = "" + wt_sequence[ 0:k1] + mutant1 + wt_sequence[ k1 + 1:k2] + mutant2 + wt_sequence[k2 + 1:] else: this_dna_string = "" + wt_sequence[ 0:k1] + mutant1 + wt_sequence[k1 + 1:k2] + mutant2 if k1 == len(wt_sequence) - 1: this_dna_string = "" + wt_sequence[ 0:k1] + mutant1 + mutant2 this_sequence = orig_code.translate( this_dna_string).sequence if '*' in this_sequence[:-1]: continue else: AA_sequences.add(this_sequence) return AA_sequences
def make_single_mutant(sequence,wt_res,res_num,mut_res,first_res=1): """ sequence (string) DNA sequence wt_res (char) single letter amino acid code of wildtype residue to be mutated res_num (int) residue id number of residue to be mutated mut_res (char) single letter amino acid code of mutant residue first_res (int) residue id number of first residue in sequence (default = 1) DNA sequence needs to start with the first residue of the protein (no promoter, etc) take DNA sequence, convert to AA, define AA point mutant, find corresponding codon of wt and mut, output forward and reverse primers DNA sequence should be only the kinase domain Desired mutation must require only a single nucleotide change """ orig_code = genetic_code(11) sequence = sequence.upper() aa_sequence = orig_code.translate(sequence).sequence if not str(wt_res) == aa_sequence[res_num-first_res]: raise IOError("Desired residue not found -- check wildtype residue name and id, and first residue id") # start of codon of residue of interest is at (res_num - first_res)*3 wt_codon = DNASequence(sequence[(res_num - first_res)*3:(res_num - first_res)*3+3]) mut_codons = orig_code.synonyms[mut_res] mut_codon = None for codon in mut_codons: if wt_codon.distance(DNASequence(codon))*3 == 1: mut_codon = codon if not mut_codon: print("Cannot make desired mutant with a single base change") mut_codon = make_mutant(wt_codon, mut_codons) good_melting_temp = False start_ix = max(0,(res_num-first_res)*3-11) end_ix = min(len(sequence),(res_num+1-first_res)*3+11) while not good_melting_temp: if end_ix - start_ix > 45: print("Acceptable melting temp was not found") break forward_primer = sequence[start_ix:(res_num - first_res)*3]+mut_codon+sequence[(res_num+1 - first_res)*3:end_ix] forward_primer = forward_primer.lower() good_melting_temp, start_ix, end_ix = check_melting_temp(forward_primer, start_ix, end_ix, len(sequence)) forward_sequence = DNASequence(forward_primer) reverse_sequence = forward_sequence.rc() reverse_primer = reverse_sequence.sequence return forward_primer, reverse_primer
def dna_to_aa(sequence, try_frames=False): """ sequence (string) DNA sequence will search for correct reading frame """ orig_code = genetic_code(11) if not try_frames: return orig_code.translate(sequence).sequence sequence = DNASequence(sequence) translated = orig_code.translate_six_frames(sequence) stops = [aastring.sequence.count('*') for aastring in translated] return translated[stops.index(min(stops))].sequence
def __init__(self, sequence, first_res=1): """ Arguments: ---------- sequence : str original DNA nucleotide sequence, treated as wild type first_res : int residue id number of first residue in sequence (default = 1) """ orig_code = genetic_code(11) sequence = sequence.upper() aa_sequence = orig_code.translate(sequence).sequence self.sequence = sequence self.aa_sequence = aa_sequence self.first_res = first_res self.orig_code = orig_code return
def all_dna_point_mutants_to_aa(wt_sequence): """ Finds all potential sequences which can be achieved by making a single nucleotide mutation and translating to amino acid sequence Ignores mutations that lead to nonsense instead of missense mutations Assumes wt_sequence starts on the correct reading frame Arguments: ---------- wt_sequence : str DNA nucleotide sequence Returns: -------- AA_sequences : set of str each str is a unique sequence of one-letter amino acid codes """ AA_sequences = set() orig_code = genetic_code(11) AA_sequences.add(orig_code.translate(wt_sequence).sequence) for k, char in enumerate(wt_sequence): for mutant in ['A', 'C', 'G', 'U']: if mutant == char: continue if k == 0: this_dna_string = "" + mutant + wt_sequence[k + 1:] if k > 0 and k < len(wt_sequence): this_dna_string = "" + wt_sequence[0:k] + mutant + wt_sequence[ k + 1:] if k == len(wt_sequence): this_dna_string = "" + wt_sequence[0:k] + mutant this_sequence = orig_code.translate(this_dna_string).sequence if '*' in this_sequence[:-1]: continue else: AA_sequences.add(this_sequence) return AA_sequences
def test_genetic_code_with_invalid_id(self): with self.assertRaises(ValueError): genetic_code(30)
def test_genetic_code_with_too_many_args(self): with self.assertRaises(TypeError): genetic_code(1, 2)