def testORF1abPolyprotein(self):
     """
     Test an ORF1ab polyprotein. The translation goes all the way through
     the end of the slippery sequence, then continues starting at the final
     nucleotide of the slippery sequence.
     """
     slipperySeq = 'TTTAAAC'
     repeats = int(15000 / 3)
     seq = 'AA' + ('AAA' * repeats) + slipperySeq + 'CCCTAAAA'
     # The sequence that gets translated is:
     # AAA 'repeats' times, then AA TTTAAAC C CCCTAAAA
     # Regrouping, we have:
     # AAA 'repeats' times, then AAT TTA AAC CCC CTA AAA
     # K   'repeats' times, then  N   L   N   P   L   K
     expected = 'K' * repeats + 'NLNPLK'
     self.assertEqual(expected, translate(seq, 'ORF1ab polyprotein'))
 def testAAATTT(self):
     """
     An AAATTT sequence must translate to a KF.
     """
     self.assertEqual('KF', translate('AAATTT'))
 def testNameWithAAA(self):
     """
     An AAA codon must translate to a Lysine (K) when a name other than
     'ORF1ab polyprotein' is passed.
     """
     self.assertEqual('K', translate('AAA', 'name'))
 def testAAA(self):
     """
     An AAA codon must translate to a Lysine (K).
     """
     self.assertEqual('K', translate('AAA'))
 def testIncomplete(self):
     """
     An incomplete nt codon must translate to an X.
     """
     self.assertEqual('X', translate('AA'))
 def testEmpty(self):
     """
     An empty nt sequence must translate to an empty aa sequence.
     """
     self.assertEqual('', translate(''))
示例#7
0
    def aaSequences(self, featureName):
        """
        Match the genome and the reference at the amino acid level.

        @param featureName: A C{str} feature name.
        @raise TranslationError: or one of its sub-classes (see translate.py)
            if a feature nucleotide sequence cannot be translated.
        @return: A 2-C{tuple} of C{dark.reads.AARead} instances, holding
            the amino acids for the feature as located in the reference
            genome and then the corresponding amino acids from the genome being
            examined.
        """
        try:
            return self._cache['aa'][featureName]
        except KeyError:
            pass

        referenceNt, genomeNt = self.ntSequences(featureName)

        assert len(referenceNt) == len(genomeNt)

        feature = self.features[featureName]
        name = feature['name']

        gapCount = genomeNt.sequence.count('-')
        if (name == 'surface glycoprotein' and gapCount > 0
                and gapCount % 3 == 0):
            referenceAaAligned = AARead(
                self.features.reference.id + f' ({name})',
                translateSpike(referenceNt.sequence))
            genomeAaAligned = AARead(self.genome.id + f' ({name})',
                                     translateSpike(genomeNt.sequence))

            if not len(referenceAaAligned) == len(genomeAaAligned):
                raise TranslatedReferenceAndGenomeLengthError(
                    'Genome and reference AA sequences have different lengths.'
                )
        else:
            referenceAa = AARead(
                self.features.reference.id + f' ({name})',
                feature.get('translation', translate(feature['sequence'],
                                                     name)))

            genomeAa = AARead(
                self.genome.id + f' ({name})',
                translate(genomeNt.sequence.replace('-', ''), name))

            referenceAaAligned, genomeAaAligned = mafft(Reads(
                [referenceAa, genomeAa]),
                                                        options=MAFFT_OPTIONS)

        if DEBUG:
            print(f'AA MATCH {name}:')

            print(f'ref nt aligned {len(referenceNt.sequence)}:',
                  referenceNt.sequence[SLICE])
            print(f'gen nt aligned {len(genomeNt.sequence)}:',
                  genomeNt.sequence[SLICE])

            print(f'ref aa        {len(referenceAa.sequence)}:',
                  referenceAa.sequence[SLICE])
            print(f'gen aa        {len(genomeAa.sequence)}:',
                  genomeAa.sequence[SLICE])

            print(f'ref aa aligned {len(referenceAaAligned.sequence)}:',
                  referenceAaAligned.sequence[SLICE])
            print(f'gen aa aligned {len(genomeAaAligned.sequence)}:',
                  genomeAaAligned.sequence[SLICE])

        self._cache['aa'][featureName] = referenceAaAligned, genomeAaAligned
        return referenceAaAligned, genomeAaAligned