def _generateProbabilitiesFromFrequencies(self): # Turn frequencies into probabilities total_codons = float(sum(self.codon_freq.values())) self.codon_prob = dict([(codon, self.codon_freq[codon]/total_codons) for codon in self.codon_freq.keys()]) total_nucleotides = float(sum(self.nucleotide_freq.values())) self.codon_prob_from_nucleotide = {} for codon in self.codon_freq.keys(): cprob = math.exp(sum([math.log(self.nucleotide_freq[nt]/total_nucleotides) for nt in codon])) self.codon_prob_from_nucleotide[codon] = cprob self.codon_prob_given_aa = {} for aa in translate.AAsAndStop(): codons = translate.getCodons(aa, rna=False) #alt_codons[aa] = codons marginal_prob = sum([self.codon_prob[c] for c in codons]) for codon in codons: # Compute the conditional probability of a codon, given that the amino acid is specified if marginal_prob > 0.0: self.codon_prob_given_aa[codon] = self.codon_prob[codon]/marginal_prob else: self.codon_prob_given_aa[codon] = 0.0
def __str__(self): s = "" if not self.nucleotide_freq is None: s += '\nnt\tnt.freq\n' for nt in 'ATGC': s += '{0}\t{1:d}\n'.format(nt, int(self.nucleotide_freq[nt])) s += '\n' if not self.codon_freq is None: if self.codon_prob is None: print "# generating probs from freqs" self._generateProbabilitiesFromFrequencies() if self.codon_syn_scores is None: print "# generating scores from probs" self._generateScoresFromProbabilities() s += 'aa\tcodon\tcodon.freq\tcodon.prob\tcodon.prob.from.nt\tcodon.cond.prob.given.aa\tsyn\n' for aa in translate.AAsAndStop(): codons = translate.getCodons(aa, rna=False) for codon in codons: s += '{0}\t{1}\t{2:d}\t{3:.5f}\t{4:.5f}\t{5:.5f}\t{6:.5f}\n'.format(aa, codon, int(self.codon_freq[codon]), self.codon_prob[codon], self.codon_prob_from_nucleotide[codon], self.codon_prob_given_aa[codon], self.codon_syn_scores[codon]) s += '\n' return s