示例#1
0
    def testEmptyAlignment(self):

        seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein)
        seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein)

        ali = self.algorithm.align(seq1, seq2)
        self.assertTrue(ali.is_empty)
示例#2
0
    def setUp(self):

        super(TestLocalAlignmentAlgorithm, self).setUp()

        self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA',
                                 SequenceTypes.Protein)
        self.seq2 = RichSequence('s1', '', 'AZCBBABAACBCCEF',
                                 SequenceTypes.Protein)
        self.algorithm = LocalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1),
                                                 gap=-1)
示例#3
0
    def setUp(self):

        super(TestAlignmentResult, self).setUp()

        self.seq1 = RichSequence('s1', '', 'AB-D', SequenceTypes.Protein)
        self.seq2 = RichSequence('s2', '', 'A-CD', SequenceTypes.Protein)
        self.ali = AlignmentResult(5.5, self.seq1, self.seq2, 10, 12, 20, 22)

        self.es = RichSequence('s1', '', '')
        self.empty = AlignmentResult(0, self.es, self.es, 0, 0, 0, 0)
    def _traceback(self, m, seq1, seq2):
        """
        Trace back and return the optimal alignment.
        """

        query = []
        subject = []

        # working with string sequences results in a massive speed-up
        qseq = ["*"] + self._sequence(seq1)
        sseq = ["*"] + self._sequence(seq2)

        i, j = self._terminus(m)
        qstart, start = i, j
        qend, end = i, j
        score = m[i][j]

        while self._expandable(m, i, j):

            if i > 0 and j > 0 and m[i][j] == (m[i - 1][j - 1] +
                                               self._score(qseq[i], sseq[j])):
                query.append(seq1.residues[i])
                subject.append(seq2.residues[j])
                qstart, start = i, j
                i, j = i - 1, j - 1

            elif i > 0 and m[i][j] == (m[i - 1][j] + self._gap):
                query.append(seq1.residues[i])
                subject.append(ResidueInfo(-1, seq2.alphabet.GAP))
                qstart = i
                i = i - 1

            elif j > 0 and m[i][j] == (m[i][j - 1] + self._gap):
                query.append(ResidueInfo(-1, seq1.alphabet.GAP))
                subject.append(seq2.residues[j])
                start = j
                j = j - 1

            else:
                assert False

        query.reverse()
        subject.reverse()

        aligned_query = RichSequence(seq1.id, seq1.header, query, seq1.type)
        aligned_subject = RichSequence(seq2.id, seq2.header, subject,
                                       seq2.type)

        return AlignmentResult(score, aligned_query, aligned_subject, qstart,
                               qend, start, end)
    def read_a3m(self, string):
        """
        Parse an alignment in A3M format.
        
        @param string: alignment string
        @type string: str
        
        @rtype: L{A3MAlignment}
        """
        alphabet = SequenceAlphabets.get(self.product_type)

        # parse all "mis-aligned" sequences as case-sensitive strings
        parser = SequenceParser(Sequence, self.product_type)
        sequences = parser.parse_string(string)

        # storage for expanded sequences
        s = []

        for dummy in sequences:
            s.append([])

        # expand all sequences with insertion characters and make them equal length
        for column in A3MSequenceIterator(sequences, str(alphabet.INSERTION)):
            for sn, char in enumerate(column):
                s[sn].append(char)

        # build normal sequence objects from the equalized sequence strings
        aligned_seqs = []

        for sn, seq in enumerate(sequences):

            sequence = RichSequence(seq.id, seq.header, s[sn],
                                    self.product_type)
            aligned_seqs.append(sequence)

        return A3MAlignment(aligned_seqs, strict=self.strict)
示例#6
0
    def testIsEmpty(self):
        self.assertFalse(self.ali.is_empty)

        es = RichSequence('s1', '', '')
        empty = AlignmentResult(0, es, es, 0, 0, 0, 0)
        self.assertTrue(empty.is_empty)
示例#7
0
 def _build(self, string):
     
     id = str(hash(string))
     seq = RichSequence(id, "", string, SequenceTypes.Protein)
     
     return SparseChainSequence.create(Chain.from_sequence(seq))