def store_low_CAA_genes(genes): """ Create list of genes where CAA usage < 0.9 """ # compute codon usage print('Computing codon statistics') dnana = DNAAnalyzer(strict=False) data = dnana.get_gene_codon_usages(genes) def compute_norm(gene, *args): """ Compute normalized occurrence frequency of aa """ all_codon_num = dnana._count_codons(str(gene.seq)) aa_num = sum([all_codon_num[codon] for codon in args]) norm = aa_num * 1000 / len(gene.seq) return norm avg_codon_freqs = dnana.get_codon_freqs(genes) print(' LYS freq: %f\n' % (avg_codon_freqs['AAA'] + avg_codon_freqs['AAG']) + ' GLU freq: %f\n' % (avg_codon_freqs['GAA'] + avg_codon_freqs['GAG']) + ' GLN freq: %f' % (avg_codon_freqs['CAA'] + avg_codon_freqs['CAG'])) # filter for genes low_CAA_genes = [] for gene, codu in data.items(): if not codu['CAA'] is None and codu['CAA'] < 0.9: lys_freq = (compute_norm(gene, 'AAA', 'AAG') / 1000) / ( avg_codon_freqs['AAA'] + avg_codon_freqs['AAG']) glu_freq = (compute_norm(gene, 'GAA', 'GAG') / 1000) / ( avg_codon_freqs['GAA'] + avg_codon_freqs['GAG']) gln_freq = (compute_norm(gene, 'CAA', 'CAG') / 1000) / ( avg_codon_freqs['CAA'] + avg_codon_freqs['CAG']) low_CAA_genes.append( (gene.id, extract_gene_name(gene), lys_freq, codu['AAA'], glu_freq, codu['GAA'], gln_freq, codu['CAA'])) # store results with open('results/low_CAA_genes.csv', 'w') as fd: wrtr = csv.writer(fd) wrtr.writerow([ 'ID', 'name', 'LYS rel freq', 'CU: AAA', 'GLU rel freq', 'CU: GAA', 'GLN rel freq', 'CU: CAA' ]) for entry in low_CAA_genes: wrtr.writerow(entry)
class TestCodonUsage(TestCase): def setUp(self): self.dnana = DNAAnalyzer() self.seq = 'AAAAAGAAA' self.genes = [ SeqRecord(Seq('AAAAAAAAG')), SeqRecord(Seq('AAAAAGAAA')), SeqRecord(Seq('TTTTTCTTT')), SeqRecord(Seq('TTCTTTTTC')) ] def test_codon_counter(self): count = self.dnana._count_codons(self.seq) self.assertEqual(count['AAA'], 2) self.assertEqual(count['AAG'], 1) self.assertEqual(count['AAT'], 0) self.assertEqual(count['AAC'], 0) def test_codon_usage(self): codu = self.dnana.get_codon_usage(self.seq) self.assertEqual(round(codu['AAA'], 3), round(0.6666, 3)) self.assertEqual(round(codu['AAG'], 3), round(0.3333, 3)) self.assertEqual(codu['AAT'], None) self.assertEqual(codu['AAC'], None) def test_average_codon_usage(self): avg_codu = self.dnana.get_avg_codon_usage(self.genes) self.assertEqual(round(avg_codu['AAA'], 3), round(0.6666, 3)) self.assertEqual(round(avg_codu['AAG'], 3), round(0.3333, 3)) self.assertEqual(avg_codu['AAT'], None) self.assertEqual(avg_codu['AAC'], None) self.assertEqual(round(avg_codu['TTT'], 3), round(0.5, 3)) self.assertEqual(round(avg_codu['TTC'], 3), round(0.5, 3)) def test_codon_frequencies(self): avg_cod_freqs = self.dnana.get_codon_freqs(self.genes) self.assertEqual(round(avg_cod_freqs['AAA'], 3), 0.333) self.assertEqual(round(avg_cod_freqs['AAG'], 3), 0.167) self.assertEqual(round(avg_cod_freqs['TTT'], 3), 0.25) self.assertEqual(round(avg_cod_freqs['TTC'], 3), 0.25)