def translateSixFrame(seq): """Translate seq in 6 frames""" from cogent import DNA from cogent.core.genetic_code import DEFAULT as standard_code translations = standard_code.sixframes(seq) stops_frame1 = standard_code.getStopIndices(seq, start=0) print translations return
def translateSixFrame(seq): """Translate seq in 6 frames""" from cogent import DNA from cogent.core.genetic_code import DEFAULT as standard_code translations = standard_code.sixframes(seq) stops_frame1 = standard_code.getStopIndices(seq, start=0) print translations return
if not protein in dna_sequence_dic: nucleotide_not_found.append(protein) continue sequence_with_stop_codons = DNA.makeSequence(dna_sequence_dic[protein]) #Check if the sequence is the right one, and check for in frame stops #It seems that in JGI annotation, when scaffolds are joined, the resulted proteins do not match #the DNA sequence #Right now, I'll just remove those sequences, and deal with that later if len(sequence_with_stop_codons) % 3 == 0: seq_no_stop_codon = sequence_with_stop_codons.withoutTerminalStopCodon() #Chec for inframe stop codons stops_frame = standard_code.getStopIndices(seq_no_stop_codon, start=0) if len(stops_frame) > 0: inframe_stops.append([cluster, genome_id, protein_id]) else: curated_protein_list[protein] = seq_no_stop_codon else: frameshift_cases.append([cluster, genome_id, protein_id]) if len(curated_protein_list) < 2: # Only take those clusters with 3 sequences or more clusters_too_short.append(cluster) continue #Alignments and output data
continue sequence_with_stop_codons = DNA.makeSequence( dna_sequence_dic[protein]) #Check if the sequence is the right one, and check for in frame stops #It seems that in JGI annotation, when scaffolds are joined, the resulted proteins do not match #the DNA sequence #Right now, I'll just remove those sequences, and deal with that later if len(sequence_with_stop_codons) % 3 == 0: seq_no_stop_codon = sequence_with_stop_codons.withoutTerminalStopCodon( ) #Chec for inframe stop codons stops_frame = standard_code.getStopIndices(seq_no_stop_codon, start=0) if len(stops_frame) > 0: inframe_stops.append([cluster, genome_id, protein_id]) else: curated_protein_list[protein] = seq_no_stop_codon else: frameshift_cases.append([cluster, genome_id, protein_id]) if len(curated_protein_list ) < 2: # Only take those clusters with 3 sequences or more clusters_too_short.append(cluster) continue