def codon_counter(nt, codons, nt_type='dna'): # Stores codons used for each amino acid and frequency used for said amino acid codon_table = dict() # Grabs the key (aa) for the given value (codon) def get_key(val): for key, value in codons.items(): if val in value: return key # Handles a RNA string passed to the codon counter if nt_type == 'rna' and type(nt) is not Seq: nt = Seq(nt) nt = nt.back_transcribe() elif nt_type == 'rna' and type(nt) is Seq: nt = nt.back_transcribe() start = None stop = None # Start and stop codons identified for the sequence for frame in range(0, len(nt), 3): if nt[frame:frame + 3] == 'ATG' and not start: print( f'Start codon {nt[frame: frame + 3]} identified at position {frame}' ) start = frame # mRNA-1273 contains all three stop codons at the end of the sequence # TAG was the last one before the 3' UTR so all stop codons included in the codon table if nt[frame:frame + 3] == 'TAG' and not stop: print( f'Stop codon {nt[frame: frame + 3]} identified at position {frame}' ) stop = frame + 3 # Trimmed nt sequence starting at ATG and ending at TAG nt_cds = nt[start:stop] prev_codon = '' # Counting codons used per amino acid for frame in range(3, (len(nt_cds) + 3), 3): aa = get_key(nt_cds[frame - 3:frame]) codon_table.setdefault(aa, []).append(str(nt_cds[frame - 3:frame])) # Returns a list of tuples (codon, num times used to translate aa in nt seq provided / total codons for aa) for aa in codon_table.keys(): codon_counts = { aa: [(codon, round(codon_table[aa].count(codon) / len(codon_table[aa]), 3)) for codon in set(codon_table[aa])] } codon_table.update(codon_counts) print(GC123(nt_cds)) return codon_table
def main(): (opts, args) = getoptions() # Load PWMs pssms = load_motifs(opts.pwm_dir, opts.pseudocount) if opts.testseq is not None: if opts.seqtype == 'RNA': seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousRNA()).back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() else: seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA()) final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index=False) else: # Scan in sequence print >> sys.stderr, "Scanning sequences ", tic = time.time() for seqrecord in SeqIO.parse(open(args[0]), "fasta"): seq = seqrecord.seq if opts.seqtype == "RNA": seq = seq.back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index=False) toc = time.time() print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
def web_pagina(): """ Als de sequentie wordt ingevoerd, dan wordt dit vertaald naar de gewenste soort. Als het een eiwit is, dan kan deze geblast worden. :return: De web applicatie dna, rna of eiwit. """ seq = request.args.get("seq", '') seq = seq.upper() if check_dna(seq): bio_dna = Seq(seq, generic_dna) return render_template("afvink4.html", soort='DNA', een=(bio_dna.transcribe()), twee=(bio_dna.translate())) elif check_rna(seq): bio_rna = Seq(seq, generic_rna) return render_template("avink4.html", soort='RNA', een=(bio_rna.back_transcribe()), twee=(bio_rna.translate())) elif check_eiwit(seq): return render_template("afvink4.html", soort='Eiwit', een="Klik op de link en druk op BLAST.", twee="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&QUERY=" + str(seq)) else: return render_template("afvink4.html", soort = 'Geen DNA, RNA of eiwit', een='', twee='')
def main(): (opts, args) = getoptions() # Load PWMs pssms = load_motifs(opts.pwm_dir, opts.pseudocount) if opts.testseq is not None: if opts.seqtype == 'RNA': seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousRNA()).back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() else: seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA()) final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index = False) else: # Scan in sequence print >> sys.stderr, "Scanning sequences ", tic = time.time() for seqrecord in SeqIO.parse(open(args[0]), "fasta"): seq = seqrecord.seq if opts.seqtype == "RNA": seq = seq.back_transcribe() seq.alphabet = IUPAC.IUPACUnambiguousDNA() final = scan_all(pssms, seq, opts) print final.to_csv(sep="\t", index = False) toc = time.time() print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
def manage_rna(data): sequence = Seq(data.sequence, IUPAC.unambiguous_rna) treated_data = Processed_dna_rna( creation_date=data.creation_date.strftime("%d/%m/%Y, %H:%M:%S"), translation_table=data.translation_table, coding_dna=str(sequence.back_transcribe()), dna_c=str(sequence.back_transcribe().complement()), dna_rc=str(sequence.back_transcribe().reverse_complement()), rna_m=str(sequence), rna_m_c=str(sequence.complement()), protein=str(sequence.translate(table=data.translation_table)), protein_to_stop=str( sequence.translate(table=data.translation_table, to_stop=True))) return Sequencer.extract_sequence_data(treated_data)
def get_switch_recognition_seq(trigger, sequence_type, length_unpaired): ''' This function receives a target trigger sequence and the type of molecule and obtains the RNA trigger for it. ''' # if sequence_type == 'RNA': # trigger_seq = Seq(trigger, generic_rna) # return(trigger_seq.back_transcribe().reverse_complement().transcribe()) # elif sequence_type == 'DNA': # trigger_seq = Seq(trigger, generic_dna) # return(trigger_seq.reverse_complement().transcribe()) trigger_seq = Seq(trigger, generic_rna) return (trigger_seq.back_transcribe().reverse_complement().transcribe())
class RNA: def __init__(self, input, path=True): self.input = input # if input is a path to fasta: if path: self.sequence = SeqIO.parse(input, 'fasta') # if input is a sequence: else: self.sequence = Seq(str(input)) def do_translation(self): return self.sequence.translate() def do_reverse_transcription(self): return self.sequence.back_transcribe()
def web_pagina(): """ Haalt de ingevoerde seq op van de webpagina, en kijkt of dit DNA, RNA of eiwit is. En voert hier de gewenste acties op uit. :return: De webaplicatie """ seq = request.args.get("seq", '') seq = seq.upper() # Checkt met andere funcie of het DNA is if check_dna(seq): bio_dna = Seq(seq, generic_dna) # Wanneer DNA, returnd hij dat het DNA is en # Geeft hij de bijbehoorende RNA en eiwit streng. return render_template("Afvink4.html", soort='DNA', een=(bio_dna.transcribe()), twee=(bio_dna.translate())) # Wanner het geen DNA is kijkt hij of het RNA is elif check_rna(seq): bio_rna = Seq(seq, generic_rna) # Wanneer RNA, returnd hij dat het RNA is en # Geeft hij de bijbehoorende DNA en eiwit streng. return render_template("Afvink4.html", soort='RNA', een=(bio_rna.back_transcribe()), twee=(bio_rna.translate())) # als het zowel geen DNA als RNA is kijkt hij of het een eiwit is elif check_eiwit(seq): # Wanneer eiwit, returnd hij dat het een eiwit is en # geeft hij een link naar de ncbi website met ingevulde resultaat zodat # je de eiwit sequentie kan blasten return render_template( "Afvink4.html", soort='Eiwit', een="klik op de link en druk op blast", twee= "https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&QUERY=" + str(seq)) # Als het zowel geen DNA, RNA of eiwit is dan returnd hij dat het # geen DNA, RNA of eiwit is else: return render_template("Afvink4.html", soort='Geen DNA, RNA of eiwit', een='', twee='')
def transcribe(records, transcribe): """ Perform transcription or back-transcription. transcribe must be one of the following: dna2rna rna2dna """ logging.info('Applying _transcribe generator: ' 'operation to perform is ' + transcribe + '.') for record in records: sequence = str(record.seq) description = record.description name = record.id if transcribe == 'dna2rna': dna = Seq(sequence, IUPAC.ambiguous_dna) rna = dna.transcribe() yield SeqRecord(rna, id=name, description=description) elif transcribe == 'rna2dna': rna = Seq(sequence, IUPAC.ambiguous_rna) dna = rna.back_transcribe() yield SeqRecord(dna, id=name, description=description)
def transcribe(records, transcribe): """ Perform transcription or back-transcription. transcribe must be one of the following: dna2rna rna2dna """ logging.info('Applying _transcribe generator: ' 'operation to perform is ' + transcribe + '.') for record in records: sequence = str(record.seq) description = record.description name = record.id if transcribe == 'dna2rna': dna = Seq(sequence, generic_dna) rna = dna.transcribe() yield SeqRecord(rna, id=name, description=description) elif transcribe == 'rna2dna': rna = Seq(sequence, generic_rna) dna = rna.back_transcribe() yield SeqRecord(dna, id=name, description=description)
def getSeed(mature_miRNA): from Bio.Seq import Seq from Bio.Alphabet import generic_dna from Bio.Alphabet import generic_rna '''takes in a mature miRNA sequence, returns 8mer mRNA | | N N N N N N N N | | | miRNA 8 7 6 5 4 3 2 1 8mer | | N N N N N N N A | | | 7mer-m8 | | N N N N N N N | | | | 7mer-A1 | | | N N N N N N A | | | 6mer | | | N N N N N N | | | | ''' my_rna = Seq(mature_miRNA, generic_rna) #sequence is RNA mRNA = str(my_rna.back_transcribe().reverse_complement()[-8:]) seed = mRNA[:-1] # 2-7 #print "found seed " + seed mer6 = seed[1:7] # 2-6 mer8 = seed + "A" mer7a = seed[1:7] + "A" mer7m8 = str(seed) seeds = {"8mer": mer8, "7mer-m8": mer7m8, "7mer1a": mer7a, "6mer": mer6} return seeds
def write_output_records(args, p_lncrna_w_tfos): """ Write out stuff """ out_file = args['o'] total_found = 0 out_records = [] for lncrna in p_lncrna_w_tfos: for tfo in lncrna.lnctfos: args_id = [tfo.id_lncrna, str(round(tfo.thermo["dg"], 2)), str(round(tfo.thermo["tm"] - 273.15, 2))] id_f = "|".join(val for val in args_id) seq_rec = Seq(tfo.seq, IUPAC.unambiguous_rna) back_transc = seq_rec.back_transcribe() complement = back_transc.complement() rec_sense = SeqRecord(complement, id=id_f, description="") # if it gets too large you should implement a different solution out_records.append(rec_sense) rec_antisense = SeqRecord(back_transc, id=id_f, description="") out_records.append(rec_antisense) total_found += 1 SeqIO.write(out_records, out_file, "fasta") print "\nDone! Found a total of {0} possible TFOs\n".format(total_found)
#reproduzindo processo de tradução from Bio.Seq import Seq mySeq = Seq("ATG") #traduzir uma sequencia de rna mensageiro em uma sequencia de proteinas #transcrição seqRNA = mySeq.transcribe() seqDNA = mySeq.back_transcribe() #tradução seqProteineRNA = seqRNA.translate() print(seqProteineRNA) seqProteineDNA = seqDNA.translate() print(seqProteineDNA)
f1 = open(output_file, 'w') slen = seed_size + 1 features = [] for miR in SeqIO.parse(mirna, "fasta"): mir_seed = Seq(str(miR.seq)[1:slen], generic_rna) #"AAGGCAC" print(miR.name) print(str(mir_seed)) for utr in SeqIO.parse(utr_Database, "fasta"): pos = 0 for seq in window(str(utr.seq), len(str(mir_seed))): if (hamming2(str(seq.upper()), str(mir_seed.back_transcribe().reverse_complement())) <= nb_max_mismatch): f1.write(utr.id + "\t" + str(pos) + "\t" + str(pos + len(str(mir_seed))) + "\t" + miR.id + "\t" + str( hamming2( str(seq.upper()), str(mir_seed.back_transcribe(). reverse_complement()))) + "\t" + "+" + "\t" + str(seq.upper()) + "\n") features.append( GraphicFeature(start=pos, end=pos + len(str(mir_seed)), strand=+1, color="#ccccff", label=re.sub(r'mmu-', '', miR.id)))
print(f"Sequencia complementar: {seq_comp}") #sequencia complementar reversa normal 5'--- 3' reversa para 3'--- 5'- Seq DNA seq_reverse_comp = my_seq.reverse_complement() print(f"Sequencia complementar reversa: {seq_reverse_comp}") #Processo de transcricao seq_rna = my_seq.transcribe() print(f"Sequencia RNA: {seq_rna}") #Processo de transcricao reverso seq_rna_rev = my_seq.back_transcribe() print(f"Sequencia RNA reversa: {seq_rna_rev}") #Processo de traducao RNA --> (Aminoacido) Proteinas seq_proteina_rna = seq_rna.translate() seq_proteina_dna = my_seq.translate() print(f"Sequencia proteina form RNA: {seq_proteina_rna}") print(f"Sequencia proteina form DNA: {seq_proteina_dna}") #Analise arquivos FASTA for fasta in SeqIO.parse("./seq.fasta","fasta"):
# load my biopython library from Bio.Seq import Seq # define my DNA sequence (randomly made) dna_str = "atgcgcgctagatcgatagta" sequence = Seq(dna_str) #make some variables to hold strings of the translated code # give me RNA from the DNA RNAfromDNA_str = Seq.transcribe( sequence) #transcription step: converting dna to rna # give me DNA from the RNA DNAfromRNA_str = Seq.back_transcribe(RNAfromDNA_str) # give me the protein from the dna PROTfromRNA_str = Seq.translate(RNAfromDNA_str) # print the output of the string variables print("\t 1 Original DNA :", dna_str, ", length is :", len(dna_str)) print("\t 1 RNA from DNA :", RNAfromDNA_str) print("\t 1 DNA from RNA :", DNAfromRNA_str) print("\t 1 PROTEIN from RNA :", PROTfromRNA_str) ################################################## # new sequence ################################################## dna_str = "atgcgcgctagattcgatagta"
class SgRna: """Holds information about a single guide RNA. Args: protospacer(Bio.Seq): sequence of the protospacer (sans constant portion). Can only be set on initialization. target_site(GenomicLocation): location targeted by the protospacer target_seq(str): sequence window +/- 10 bases around protospacer (can be used to find PAM) offtarget_sites{GenomicLocation: [geneid1, geneid2,...]}: holds info about potential offtarget sites found in genome of interest, including if those offtargets fall within genes pam(str): protospacer adjacent motif for this guide constant_region(Bio.Seq): constant region associated with this guide. Used to calculate secondary structure. score(float): score of this guide """ # instance variables def __init__(self, seq, constant_region="GUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUUUUUU", target_site=None, target_seq="", pam="" ): # weissman constant = "GUUUAAGAGCUAAGCUGGAAACAGCAUAGCAAGUUUAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUUUUUUU" # broad constant = "GUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUUUUUU" # turn DNA input into RNA seq_copy = seq if seq_copy.find("T"): # convert to RNA if not already done seq_copy = seq.replace( "T", "U" ) #print seq, seq_copy self.protospacer = Seq(seq_copy, generic_rna) # sequence sans constant portion. can only set protospacer on initialization. always stored as RNA self.target_site = target_site # will eventually become a GenomicLocation. strand (+ means sgrna seq same as + strand, - means sgrna seq same as - strand), self.target_seq = target_seq # 10 bases on either side of site self.offtarget_sites = {} # dict, format = {GenomicLocation: [gene1, gene2, gene3...]} self.pam = pam self.constant_region = Seq( constant_region, generic_rna ) self.score = 0 def __eq__( self, other ): return( (self.protospacer, self.target_site, self.target_seq, self.offtarget_sites, self.constant_region, self.score) == (other.protospacer, other.target_site, other.target_seq, other.offtarget_sites, other.constant_region, other.score)) def __ne__( self, other ): return not self == other def build_fullseq( self ): fullseq = self.protospacer + self.constant_region if fullseq[0] != "G": fullseq = "G" + fullseq return fullseq def build_protospacerpam( self ): # returns DNA p = self.protospacer.back_transcribe() if self.pam != "": return Seq( str(p)+str(self.pam), generic_dna ) if self.target_seq == "": print "Can't build PAM without sequence context. Defaulting to protospacer %s" % self.protospacer.back_transcribe() return self.protospacer.back_transcribe() else: temp_target = self.target_seq index = temp_target.find( p ) if index == -1: temp_target = temp_target.reverse_complement() index = temp_target.find( p ) if index == -1: print "Can't find protospacer in target sequence" # print p, temp_target return "" if self.pam != "": pam = self.pam else: pam = temp_target[ index+len(p):index+len(p)+len(self.pam) ] self.pam = pam return Seq( str(p)+str(self.pam), generic_dna )
def retrotranscription(self): seq = Seq(self.sequence, IUPAC.unambiguous_rna) retro_transcript = str(seq.back_transcribe()) return True, retro_transcript, self.quality
import Bio Bio.__version__ from Bio.Seq import Seq s1 = Seq('ATGGCTTTATTTTCCCGGGA') s1.complement() s1.reverse_complement() s1.transcribe() s1.back_transcribe() s1.back_transcribe() == Seq('ATGGCTTTATTTTCCCGGGA') s1.translate()
def __init__(self, sequence, origin_id, host_id, translation_table_origin=1, translation_table_host=1, use_frequency=False, lower_threshold=None, strong_stop=True, lower_alternative=True, use_replacement_table=True, use_highest_frequency_if_ambiguous=True): """ Initialize the Sequence object sequence - DNA or RNA sequence as Bio.Seq object or string. This can for example be generated by using BioPython directly or by loading a FASTA file using LibCharm.IO.load_file origin_id - Species id of the origin organism (can be found in the URL at http://www.kazusa.or.jp/codon) host_id - Species id of the host organism (can be found in the URL at http://www.kazusa.or.jp/codon) translation_table_host - Integer; Genetic code used by the target host organism. Corrensponds to one of the translation tables listed here: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi translation_table_origin - Integer; Genetic code used by the target host organism. Corrensponds to one of the translation tables listed here: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi use_frequency - Boolean; Use frequency per thousand instead of fraction during the assessment of the codon usage lower_threshold - Float; Threshold that defines the minimum codon usage that is considered appropriate. By default, a harmonized codon can only be lower than this threshold, if the original codon usage in the original organism is lower than this threshold, too. strong_stop - Boolean; Defines whether a strong stop codon (e.g. TAA in bacterial hosts) should be used. This may cause the stop codon not to be perfectly harmonized. lower_alternative - Boolean; Defines whether the lower or higher usage codon should be used if df for two alternative codons is equal. use_replacement_table - Boolean; If true, do not compute the harmonization for every single codon in the sequence, but for every unique codon in the sequence. This is done by default as it is much faster. use_highest_frequency_if_ambiguous - Boolean: If the sequence contains ambiguous codons (e.g. GCN), always assume that the most frequent unambiguous codon is used. If set to 'False', the least frequent unambiguous codon will be used. """ # setting threshold if provided, otherwise fall back to defaults if not lower_threshold: if use_frequency: if not lower_threshold: self.lower_threshold = 5 else: if not lower_threshold: self.lower_threshold = 0.1 else: self.lower_threshold = lower_threshold # set other variables to provided values or defaults self.strong_stop = strong_stop self.lower_alternative = lower_alternative self.use_replacement_table = use_replacement_table self.use_frequency = use_frequency self.use_highest_frequency_if_ambiguous = use_highest_frequency_if_ambiguous # generate a list of ambiguous DNA letters only (IUPACData.ambiguous_dna_letters also includes the unambiguous # G, C, A and T. self.ambiguous_dna_letters = list( set(IUPACData.ambiguous_dna_letters) - set(IUPACData.unambiguous_dna_letters)) # check if translation table id is > 15. Values > 15 cannot be mapped to http://www.kazusa.or.jp/codon/! if translation_table_origin > 15 or translation_table_host > 15: raise ValueError( 'Though the NCBI lists more than 15 translation tables, CHarm is limited to the ' 'first 15 as listed on \'http://www.kazusa.or.jp/codon/\'.') # Set translation table for original sequence self.translation_table_origin = CodonTable.ambiguous_dna_by_id[int( translation_table_origin)] self.translation_table_host = CodonTable.ambiguous_dna_by_id[int( translation_table_host)] # Reformat and sanitize sequence string (remove whitespaces, change to uppercase) if type(sequence) is 'str': try: # if a string is provided, check if it contains U and not T to distinguish between RNA and DNA if 'U' in sequence and not 'T' in sequence: seq = Seq(''.join(sequence.upper().split()), IUPAC.ambiguous_rna) # if RNA, convert to DNA alphabet self.original_sequence = seq.back_transcribe() else: self.original_sequence = Seq( ''.join(sequence.upper().split()), IUPAC.ambiguous_dna) except ValueError as error: print('ERROR: {}'.format(error)) exit(1) else: self.original_sequence = sequence # Translate original DNA sequence to amino acid sequence self.original_translated_sequence = self.translate_sequence( self.original_sequence, self.translation_table_origin, cds=True) # Initialize empty harmonize sequence self.harmonized_sequence = '' # Fetch codon usage tables for original and host organism self.usage_origin = CodonUsageTable( 'http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?' 'species={}&aa={}&style=N'.format(origin_id, translation_table_origin), self.use_frequency) self.usage_host = CodonUsageTable( 'http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?' 'species={}&aa={}&style=N'.format(host_id, translation_table_host), self.use_frequency) # Split DNA sequence into list of codons self.codons = self.split_original_sequence_to_codons() # Harmonize codon usage self.harmonize_codons() # Construct new sequence out of harmonized codons self.harmonized_sequence = self.construct_new_sequence() # Translate harmonized DNA sequence to amino acid sequence self.harmonized_translated_sequence = self.translate_sequence( self.harmonized_sequence, self.translation_table_host, cds=True)
print("\t User picks option: nt") if orient_str == '3-5': print("\t nt and 3'- 5'") dnaSeq_str = dnaSeq_str.reverse_complement() elif orient_str == '5-3': print("\t nt and 5'-3'") dnaSeq_str = dnaSeq_str.complement() else: print("\t Unknown primality") exit() else: print("\t Unknown template type") exit() print("\t + End of DNA Manipulation algorithm. DNASeq is: ", dnaSeq_str, "\n") # if you want to add some translation functionality ... print("\t __Translation__") sequence = dnaSeq_str RNAfromDNA_str = Seq.transcribe(sequence) # gives RNA sequence DNAfromRNA_str = Seq.back_transcribe( RNAfromDNA_str) # gives DNA sequence from the RNA conversion PROTfromRNA_str = Seq.translate(RNAfromDNA_str) print(" Original DNA :", dnaSeq_str) print(" RNA from DNA :", RNAfromDNA_str) print(" DNA from RNA :", DNAfromRNA_str) print(" PROT from RNA :", PROTfromRNA_str) print(" End of program!")
def back_transcribe(self): seq = Seq(self.string) return Dna(str(seq.back_transcribe()))
def __init__(self, sequence, origin_id, host_id, translation_table_origin=1, translation_table_host=1, use_frequency=False, lower_threshold=None, strong_stop=True, lower_alternative=True, use_replacement_table=True, use_highest_frequency_if_ambiguous=True): """ Initialize the Sequence object sequence - DNA or RNA sequence as Bio.Seq object or string. This can for example be generated by using BioPython directly or by loading a FASTA file using LibCharm.IO.load_file origin_id - Species id of the origin organism (can be found in the URL at http://www.kazusa.or.jp/codon) host_id - Species id of the host organism (can be found in the URL at http://www.kazusa.or.jp/codon) translation_table_host - Integer; Genetic code used by the target host organism. Corrensponds to one of the translation tables listed here: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi translation_table_origin - Integer; Genetic code used by the target host organism. Corrensponds to one of the translation tables listed here: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi use_frequency - Boolean; Use frequency per thousand instead of fraction during the assessment of the codon usage lower_threshold - Float; Threshold that defines the minimum codon usage that is considered appropriate. By default, a harmonized codon can only be lower than this threshold, if the original codon usage in the original organism is lower than this threshold, too. strong_stop - Boolean; Defines whether a strong stop codon (e.g. TAA in bacterial hosts) should be used. This may cause the stop codon not to be perfectly harmonized. lower_alternative - Boolean; Defines whether the lower or higher usage codon should be used if df for two alternative codons is equal. use_replacement_table - Boolean; If true, do not compute the harmonization for every single codon in the sequence, but for every unique codon in the sequence. This is done by default as it is much faster. use_highest_frequency_if_ambiguous - Boolean: If the sequence contains ambiguous codons (e.g. GCN), always assume that the most frequent unambiguous codon is used. If set to 'False', the least frequent unambiguous codon will be used. """ # setting threshold if provided, otherwise fall back to defaults if not lower_threshold: if use_frequency: if not lower_threshold: self.lower_threshold = 5 else: if not lower_threshold: self.lower_threshold = 0.1 else: self.lower_threshold = lower_threshold # set other variables to provided values or defaults self.strong_stop = strong_stop self.lower_alternative = lower_alternative self.use_replacement_table = use_replacement_table self.use_frequency = use_frequency self.use_highest_frequency_if_ambiguous = use_highest_frequency_if_ambiguous # generate a list of ambiguous DNA letters only (IUPACData.ambiguous_dna_letters also includes the unambiguous # G, C, A and T. self.ambiguous_dna_letters = list(set(IUPACData.ambiguous_dna_letters) - set(IUPACData.unambiguous_dna_letters)) # check if translation table id is > 15. Values > 15 cannot be mapped to http://www.kazusa.or.jp/codon/! if translation_table_origin > 15 or translation_table_host > 15: raise ValueError('Though the NCBI lists more than 15 translation tables, CHarm is limited to the ' 'first 15 as listed on \'http://www.kazusa.or.jp/codon/\'.') # Set translation table for original sequence self.translation_table_origin = CodonTable.ambiguous_dna_by_id[int(translation_table_origin)] self.translation_table_host = CodonTable.ambiguous_dna_by_id[int(translation_table_host)] # Reformat and sanitize sequence string (remove whitespaces, change to uppercase) if type(sequence) is 'str': try: # if a string is provided, check if it contains U and not T to distinguish between RNA and DNA if 'U' in sequence and not 'T' in sequence: seq = Seq(''.join(sequence.upper().split()), IUPAC.ambiguous_rna) # if RNA, convert to DNA alphabet self.original_sequence = seq.back_transcribe() else: self.original_sequence = Seq(''.join(sequence.upper().split()), IUPAC.ambiguous_dna) except ValueError as error: print('ERROR: {}'.format(error)) exit(1) else: self.original_sequence = sequence # Translate original DNA sequence to amino acid sequence self.original_translated_sequence = self.translate_sequence(self.original_sequence, self.translation_table_origin, cds=True) # Initialize empty harmonize sequence self.harmonized_sequence = '' # Fetch codon usage tables for original and host organism self.usage_origin = CodonUsageTable('http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?' 'species={}&aa={}&style=N'.format(origin_id, translation_table_origin), self.use_frequency) self.usage_host = CodonUsageTable('http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?' 'species={}&aa={}&style=N'.format(host_id, translation_table_host), self.use_frequency) # Split DNA sequence into list of codons self.codons = self.split_original_sequence_to_codons() # Harmonize codon usage self.harmonize_codons() # Construct new sequence out of harmonized codons self.harmonized_sequence = self.construct_new_sequence() # Translate harmonized DNA sequence to amino acid sequence self.harmonized_translated_sequence = self.translate_sequence(self.harmonized_sequence, self.translation_table_host, cds=True)
print IUPAC.ambiguous_dna.letters # letras IUPAC de bases de adn print IUPAC.ExtendedIUPACProtein.letters # letras de todas las proteínas existentes print IUPAC.ExtendedIUPACDNA.letters # letras de todas las bases existentes from Bio.Seq import Seq seq = Seq('CCGGTT',IUPAC.unambiguous_dna) print seq seq=seq.transcribe() #must be DNA to transcribe to RNA print seq seq=seq.translate() #must be DNA to translate to protein print seq #tipo de dato secuencia seq=Seq('CCGGUU',IUPAC.IUPACUnambiguousRNA()) #constructor class IUPAC...RNA print seq print seq.back_transcribe() #must be RNA to backtranscribe to DNA seq=Seq('ATGGTCTTTCCAGACGCG',IUPAC.unambiguous_dna) print Seq.transcribe(seq) #as function, up is as method print seq[:5] #methods as string print len(seq) #seq[0]='C' #aren't mutables st=str(seq) #toString print st #tipo de dato secuencia editable from Bio.Seq import MutableSeq mut_seq=seq.tomutable() #convertirlo a tipo seq mutable print mut_seq mut_seq[0]='C'