def getX4Codons(pssm,dna): std_nt = CodonTable.unambiguous_dna_by_name["Standard"] # create normal codon table object nonstd = IUPACData.ambiguous_dna_values # create list of all ambiguous DNA values, includes normal bases aa_trans = [] for i in range(0,len(dna),3): codon = dna.tostring()[i:i+3] # For each ambiguous (or not) codon, returns list of all posible translations aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) aa_trans.append(aa) prot ="" pos = 0 final_score=0 print "xxxxxxxxxxxxxxxxxx getX4Codons xxxxxxxx" for x in aa_trans: if len(x) == 1: prot = prot+x[0] final_score = final_score + pssm[pos][x[0]] print x[0]+" = ",pssm[pos][x[0]] else: c = {} for y in x: score = pssm[pos][y] c[score] = y best = c.keys() best.sort() prot = prot + c[best[len(best)-1]] final_score = final_score + best[len(best)-1] print c[best[len(best)-1]] + " = " + str(best[len(best)-1]) pos = pos + 1 print "----------- end ---------" return (prot,final_score)
def count_usage(input_file): standard_forward_table = CodonTable.standard_dna_table.forward_table counting_forward_table = LookupCountingForwardTable(standard_forward_table) codon_table = CodonTable.CodonTable(forward_table=counting_forward_table) record = FeatureParser().parse(open(input_file)) complement = record.seq.complement() for feat in record.features: if feat.type == "gene": bSeq = feat.strand == 1 and record.seq or complement start = feat.location.start.position end = feat.location.end.position pSeq = bSeq[start:end].translate(codon_table) # print "Usage counts for ", input_file # print counting_forward_table.counts return counting_forward_table.counts
def makeTableX(table): assert table.protein_alphabet == IUPAC.extended_protein return CodonTable.CodonTable( table.nucleotide_alphabet, proteinX, MissingTable(table.forward_table), table.back_table, table.start_codons, table.stop_codons, )
def main(): bases = ['T','C','A','G'] ambig_bases = ['R','Y','S','W','K','M','D','H','B','V','N'] codons = [a+b+c for a in bases for b in bases for c in bases] amino_acids = "F F L L S S S S Y Y stop stop C C stop W L L L L P P P P H H Q Q R R R R I I I M T T T T N N K K S S R R V V V V A A A A D D E E G G G G".split(' ') codon_table = dict(zip(codons, amino_acids)) nonstd = IUPACData.ambiguous_dna_values # create list of all ambiguous DNA values, includes normal bases std_nt = CodonTable.unambiguous_dna_by_name["Standard"] # create normal codon table object all_ambig_trip = [] for s in list(itertools.product(*[ambig_bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[ambig_bases,bases,ambig_bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[ambig_bases,ambig_bases,bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[ambig_bases,bases,bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[bases,ambig_bases,bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[bases,bases,ambig_bases])): all_ambig_trip.append(s) k = codon_table.keys() k.sort() for b in k: print "codon_table:",b," aa:",codon_table[b] stop_set ={} # store final results of ambig codon and all possible translations minus the potential stop for trip in all_ambig_trip: stop_seen = False ambig = "".join(trip) # print "Ambig = ",ambig stop_set[ambig] = [] for codon in list(itertools.product(*[ list(nonstd[trip[0]]), list(nonstd[trip[1]]), list(nonstd[trip[2]]) ])): # translate ambig into all possible real combinations codon = "".join(codon) aa = codon_table[codon] # print "\tcodon: ",codon," aa: ",aa if aa == 'stop': stop_seen = True else: stop_set[ambig].append(aa) if stop_seen is False: del(stop_set[ambig]) try: CodonTable.list_possible_proteins(ambig,std_nt.forward_table,nonstd) except KeyError, err: print "Sanity check failed for: ",ambig," with Key error. stop_seen is ",str(stop_seen)," ",err except CodonTable.TranslationError, TransError: print "Sanity check failed for: ",ambig," with Translation error. stop_seen is ",str(stop_seen)," ",TransError
def generateProtFromAmbiguousDNA(self, s): standard_nucleotide = CodonTable.unambiguous_dna_by_name["Standard"] non_standard_nucleotide = IUPACData.ambiguous_dna_values aaTranslations = [] for i in range(0,len(s),3): codon = s.tostring()[i:i+3] # list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values) if codon.count('-') == 3: aa = ['-'] elif codon.count('-') == 1 and codon.index('-') == 2: if codon[0] == 'U': # UC- : Serine (S) if codon [1] == 'C': aa = ['S'] elif codon[0] == 'C': # CU- : Leucine (L) if codon[1] == 'U': aa = ['L'] # CC- : Proline (P) elif codon[1] == 'C': aa = ['P'] # CG- : Arginine (R) elif codon[1] == 'G': aa = ['R'] elif codon[0] == 'A': # AC- : Threonine (T) if codon[1] == 'C': aa = ['T'] elif codon[0] == 'G': # GU- : Valine (V) if codon[1] == 'U': aa = ['V'] # GC- : Alanine (A) elif codon[1] == 'C': aa = ['A'] # GG- : Glycine (G) elif codon[1] == 'G': aa = ['G'] elif codon.count('-') < 3 and codon.count('-') > 0: aa = ['X'] else: try: aa = CodonTable.list_possible_proteins(codon,standard_nucleotide.forward_table,non_standard_nucleotide) except: aa = ['X'] aaTranslations.append(aa) return aaTranslations
def generateProtFromAmbiguousDNA(s): std_nt = CodonTable.unambiguous_dna_by_name["Standard"] # create normal codon table object nonstd = IUPACData.ambiguous_dna_values # create list of all ambiguous DNA values, includes normal bases aa_trans = [] for i in range(0,len(s),3): codon = s.tostring()[i:i+3] # For each ambiguous (or not) codon, returns list of all posible translations aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) aa_trans.append(aa) # Now have a list of format [ [a], [b,c], [d], [e,f,g], etc ] # this function creates a list of tuples containing all possible ordered combinations like # [(a,b,d,e), (a,b,d,f), (a,b,d,g), (a,c,d,e), (a,c,d,f), (a,c,d,g)] proteins = list(itertools.product(*aa_trans)) possible_proteins = [] for x in proteins: possible_proteins.append("".join(x)) return possible_proteins
#make a codon table that can handle gaps #start by getting the standard codon table table = CodonTable.standard_dna_table.forward_table #add gaps for c1 in ["A", "C", "G", "T", "N"]: table["%s--" % c1] = "X" table["-%s-" % c1] = "X" table["--%s" % c1] = "X" for c2 in ["A", "C", "G", "T", "N"]: table["%s%s-" % (c1, c2)] = "X" table["-%s%s" % (c1, c2)] = "X" table["%s-%s" % (c1, c2)] = "X" table["---"] = "-" #now register is and export CodonTable.register_ncbi_table(name='gapped', alt_name="CAS0", id=99, table=table, stop_codons=[ 'TAA', 'TAG', 'TGA', ], start_codons=[ 'TTG', 'CTG', 'ATG', ]) GAPPED_CODON_TABLE = CodonTable.ambiguous_dna_by_name["gapped"]
\n\ %s\n" CMD_BLASTCLUST = "/ifs/home/c2b2/bh_lab/shares/blast/current/ia32-linux/bin/blastclust -p F -L .9 -S 95 -i %s -o %s" #pF: nucleotide; L.9: 90%[coverage] S: Identities PARSED_BLAST_HEADER = ["qid", "sid", "identity", "align_len", "mismatches", "gaps", "qstart", "qend", "sstart", "send", "evalue", "score", "strand","other_sids"] PARSED_BLAST_HEADER_VERBOSE = ["query_id", "sbjct_id", "strand", "evalue", "score", "identities", "gaps", "aln_len", "query_start", "query_end", "query_len", "sbjct_start", "sbjct_end", "aln_query", "aln_sbjct"] #make a codon table that can handle gaps #start by getting the standard codon table table = CodonTable.standard_dna_table.forward_table #add gaps for c1 in ["A", "C", "G", "T", "N"]: table["%s--"%c1] = "X" table["-%s-"%c1] = "X" table["--%s"%c1] = "X" for c2 in ["A", "C", "G", "T", "N"]: table["%s%s-"%(c1,c2)] = "X" table["-%s%s"%(c1,c2)] = "X" table["%s-%s"%(c1,c2)] = "X" table["---"]="-" #now register is and export CodonTable.register_ncbi_table(name='gapped',alt_name="CAS0",id=99,table=table, stop_codons=['TAA', 'TAG', 'TGA', ], start_codons=['TTG', 'CTG', 'ATG', ] ) GAPPED_CODON_TABLE=CodonTable.ambiguous_dna_by_name["gapped"]
def _translate_str(sequence, table, stop_symbol="*", to_stop=False, pos_stop="X"): """Helper function to translate a nucleotide string (PRIVATE). sequence - a string table - a CodonTable object (NOT a table name or id number) stop_symbol - a single character string, what to use for terminators. to_stop - boolean, should translation terminate at the first in frame stop codon? If there is no in-frame stop codon then translation continues to the end. pos_stop - a single character string for a possible stop codon (e.g. TAN or NNN) Returns a string. e.g. >>> from Bio.Data import CodonTable >>> table = CodonTable.ambiguous_dna_by_id[1] >>> _translate_str("AAA", table) 'K' >>> _translate_str("TAR", table) '*' >>> _translate_str("TAN", table) 'X' >>> _translate_str("TAN", table, pos_stop="@") '@' >>> _translate_str("TA?", table) Traceback (most recent call last): ... TranslationError: Codon 'TA?' is invalid """ sequence = sequence.upper() amino_acids = [] forward_table = table.forward_table stop_codons = table.stop_codons if table.nucleotide_alphabet.letters is not None: valid_letters = set(table.nucleotide_alphabet.letters.upper()) else: #Assume the worst case, ambiguous DNA or RNA: valid_letters = set(IUPAC.ambiguous_dna.letters.upper() + \ IUPAC.ambiguous_rna.letters.upper()) n = len(sequence) for i in xrange(0, n - n % 3, 3): codon = sequence[i:i + 3] try: amino_acids.append(forward_table[codon]) except (KeyError, CodonTable.TranslationError): #Todo? Treat "---" as a special case (gapped translation) if codon in table.stop_codons: if to_stop: break amino_acids.append(stop_symbol) elif valid_letters.issuperset(set(codon)): #Possible stop codon (e.g. NNN or TAN) amino_acids.append(pos_stop) else: raise CodonTable.TranslationError(\ "Codon '%s' is invalid" % codon) return "".join(amino_acids)
# print trans # if str(trans) == str(TolC): # print 'congrats, you reinvented the wheel' # else: # print 'you still suck' def reverse_translate_amino_acid(self, amino_acid): codon_options = self.amino_acid_to_codon_map[amino_acid] # Somehow make a choice. For now, just first one. codon = codon_options[0] # Return the one we chose. return codon if __name__ == '__main__': my_codon_table = CodonTable('standard_usage.txt') print my_codon_table.translate_codon('AAA') print my_codon_table.translate_sequence('ATGAAGAAATTGCTCCCCATT') #print len(my_codon_table.codon_to_amino_acid_map) #print my_codon_table.amino_acid_to_codon_map #print len(my_codon_table.amino_acid_to_codon_map) #print my_codon_table.amino_acid_to_codon_map['A'] #print my_codon_table.amino_acid_to_weight_map #print my_codon_table.translate_codon('AAA') #print my_codon_table.reverse_translate_amino_acid('F') #my_second_codon_table = CodonTable('weird_usage.txt')
def run_script(args): #open the input file, which should be args.scriptinput #send to the correct function based on the choice # print args.scriptinput scriptinput = open(args.scriptinput[0]) outputfh = open(args.output[0],'w') output = csv.writer(outputfh)#output.txt','w') my_codon_table = CodonTable('rEcoli-codon-usage.txt') my_rare_codon_table = CodonTable('rEcoli-codon-usage-1st20.txt') scriptinputreader = csv.reader(scriptinput) if args.mode[0] == 'translate': for row in scriptinputreader: line = row[1] # print line codonseq = '' ti = 0 line = line.replace('T','U') while ti + 2 < len(line): codon = line[ti] + line[ti+1] + line[ti+2] codonseq += my_codon_table.translate_codon(codon) ti += 3 output.writerow([row[0]] + [codonseq]) elif args.mode[0] == 'revtranslate': for row in scriptinputreader: line = row[1] # print line codonseq = '' ti = 0 for character in line: if ti < 20: table = my_rare_codon_table else: table = my_codon_table codonseq += table.reverse_translate_codon(character) ti += 1 #print ti codonseq = codonseq.replace('U','T') output.writerow([row[0]] + [codonseq]) elif args.mode[0] == 'transcribe': for row in scriptinputreader: line = row[1] # print line codonseq = '' codonseq = line.replace('T','U') output.writerow([row[0]] + [codonseq]) # else: # print args.mode elif args.mode[0] == 'calCUT': calCUT = {} for row in scriptinputreader: line = row[1] ti = 0 while ti + 2 < len(line): codon = line[ti] + line[ti+1] + line[ti+2] #print codon if not codon in calCUT: calCUT[codon] = 0 calCUT[codon] += 1 ti += 3 #print ti for codon in calCUT: count = calCUT[codon] cudun = codon.replace('T','U') AA = my_codon_table.translate_codon(cudun) output.writerow([codon] + [AA] + [count]) # print codonseq # if str(codonseq) == str(TolC): # print "nice" # else: # print "you suck" outputfh.close()