def translate_seqs(seq_iter, gap_characters=['-'], **kwargs): ''' Translates DNA or RNA sequences into amino acid sequences. This function returns a generator that yields a copy of each input sequence translated into an amino acid sequence (all `gap_characters` are removed from the sequence prior to translation). `kwargs` are keyword arguments that are passed to the Seq.translate method of biopython to translate each sequence. Below is biopython's description of valid keyword arguments: - table - Which codon table to use? This can be either a name (string), an NCBI identifier (integer), or a CodonTable object (useful for non-standard genetic codes). This defaults to the "Standard" table. - stop_symbol - Single character string, what to use for terminators. This defaults to the asterisk, "*". - to_stop - Boolean, defaults to False meaning do a full translation continuing on past any stop codons (translated as the specified stop_symbol). If True, translation is terminated at the first in frame stop codon (and the stop_symbol is not appended to the returned protein sequence). - cds - Boolean, indicates this is a complete CDS. If True, this checks the sequence starts with a valid alternative start codon (which will be translated as methionine, M), that the sequence length is a multiple of three, and that there is a single in frame stop codon at the end (this will be excluded from the protein sequence, regardless of the to_stop option). If these tests fail, an exception is raised. ''' for s in remove_gaps(seq_iter, gap_characters=gap_characters): yield sequtils.get_translation(seq_record=s, **kwargs)
def translate_seqs(seq_iter, gap_characters=['-'], **kwargs): ''' Translates DNA or RNA sequences into amino acid sequences. This function returns a generator that yields a copy of each input sequence translated into an amino acid sequence (all `gap_characters` are removed from the sequence prior to translation). `kwargs` are keyword arguments that are passed to the Seq.translate method of biopython to translate each sequence. Below is biopython's description of valid keyword arguments: - table - Which codon table to use? This can be either a name (string), an NCBI identifier (integer), or a CodonTable object (useful for non-standard genetic codes). This defaults to the "Standard" table. - stop_symbol - Single character string, what to use for terminators. This defaults to the asterisk, "*". - to_stop - Boolean, defaults to False meaning do a full translation continuing on past any stop codons (translated as the specified stop_symbol). If True, translation is terminated at the first in frame stop codon (and the stop_symbol is not appended to the returned protein sequence). - cds - Boolean, indicates this is a complete CDS. If True, this checks the sequence starts with a valid alternative start codon (which will be translated as methionine, M), that the sequence length is a multiple of three, and that there is a single in frame stop codon at the end (this will be excluded from the protein sequence, regardless of the to_stop option). If these tests fail, an exception is raised. ''' for s in remove_gaps(seq_iter, gap_characters=gap_characters): yield sequtils.get_translation(seq_record = s, **kwargs)
def test_extra_base(self): s1 = SeqRecord(Seq('ATGACCAACTGAATAA', IUPAC.unambiguous_dna), id='1') s2 = sequtils.get_translation(s1) self.assertSameMetadata(s1, s2) self.assertFalse(s1 is s2) self.assertEqual(str(s2.seq), 'MTN*I')
def test_to_stop(self): s1 = SeqRecord(Seq('ATGACCAACTGA', IUPAC.unambiguous_dna), id='1') s2 = sequtils.get_translation(s1, to_stop=True) self.assertSameMetadata(s1, s2) self.assertFalse(s1 is s2) self.assertEqual(str(s2.seq), 'MTN')
def test_ambiguous_rna(self): s1 = SeqRecord(Seq('AUGAURAACUGA', IUPAC.ambiguous_rna), id='1') s2 = sequtils.get_translation(s1) self.assertSameMetadata(s1, s2) self.assertFalse(s1 is s2) self.assertEqual(str(s2.seq), 'MXN*')
def test_extra_base(self): s1 = SeqRecord(Seq('ATGACCAACTGAATAA', IUPAC.unambiguous_dna), id='1') s2 = sequtils.get_translation(s1) self.assertSameMetadata(s1, s2) self.assertNotEqual(s1, s2) self.assertEqual(str(s2.seq), 'MTN*I')
def test_to_stop(self): s1 = SeqRecord(Seq('ATGACCAACTGA', IUPAC.unambiguous_dna), id='1') s2 = sequtils.get_translation(s1, to_stop = True) self.assertSameMetadata(s1, s2) self.assertNotEqual(s1, s2) self.assertEqual(str(s2.seq), 'MTN')
def test_ambiguous_rna(self): s1 = SeqRecord(Seq('AUGAURAACUGA', IUPAC.ambiguous_rna), id='1') s2 = sequtils.get_translation(s1) self.assertSameMetadata(s1, s2) self.assertNotEqual(s1, s2) self.assertEqual(str(s2.seq), 'MXN*')
def test_ambiguous_dna(self): s1 = SeqRecord(Seq('ATGATRAACTGA', IUPAC.ambiguous_dna), id='1') s2 = sequtils.get_translation(s1) self.assertSameMetadata(s1, s2) self.assertFalse(s1 is s2) self.assertEqual(str(s2.seq), 'MXN*')
def test_unambiguous_rna(self): s1 = SeqRecord(Seq('AUGACCAACUGA', IUPAC.unambiguous_rna), id='1') s2 = sequtils.get_translation(s1) self.assertSameMetadata(s1, s2) self.assertFalse(s1 is s2) self.assertEqual(str(s2.seq), 'MTN*')