def testEmptyAlignment(self): seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein) seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein) ali = self.algorithm.align(seq1, seq2) self.assertTrue(ali.is_empty)
def setUp(self): super(TestLocalAlignmentAlgorithm, self).setUp() self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA', SequenceTypes.Protein) self.seq2 = RichSequence('s1', '', 'AZCBBABAACBCCEF', SequenceTypes.Protein) self.algorithm = LocalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1), gap=-1)
def setUp(self): super(TestAlignmentResult, self).setUp() self.seq1 = RichSequence('s1', '', 'AB-D', SequenceTypes.Protein) self.seq2 = RichSequence('s2', '', 'A-CD', SequenceTypes.Protein) self.ali = AlignmentResult(5.5, self.seq1, self.seq2, 10, 12, 20, 22) self.es = RichSequence('s1', '', '') self.empty = AlignmentResult(0, self.es, self.es, 0, 0, 0, 0)
def _traceback(self, m, seq1, seq2): """ Trace back and return the optimal alignment. """ query = [] subject = [] # working with string sequences results in a massive speed-up qseq = ["*"] + self._sequence(seq1) sseq = ["*"] + self._sequence(seq2) i, j = self._terminus(m) qstart, start = i, j qend, end = i, j score = m[i][j] while self._expandable(m, i, j): if i > 0 and j > 0 and m[i][j] == (m[i - 1][j - 1] + self._score(qseq[i], sseq[j])): query.append(seq1.residues[i]) subject.append(seq2.residues[j]) qstart, start = i, j i, j = i - 1, j - 1 elif i > 0 and m[i][j] == (m[i - 1][j] + self._gap): query.append(seq1.residues[i]) subject.append(ResidueInfo(-1, seq2.alphabet.GAP)) qstart = i i = i - 1 elif j > 0 and m[i][j] == (m[i][j - 1] + self._gap): query.append(ResidueInfo(-1, seq1.alphabet.GAP)) subject.append(seq2.residues[j]) start = j j = j - 1 else: assert False query.reverse() subject.reverse() aligned_query = RichSequence(seq1.id, seq1.header, query, seq1.type) aligned_subject = RichSequence(seq2.id, seq2.header, subject, seq2.type) return AlignmentResult(score, aligned_query, aligned_subject, qstart, qend, start, end)
def read_a3m(self, string): """ Parse an alignment in A3M format. @param string: alignment string @type string: str @rtype: L{A3MAlignment} """ alphabet = SequenceAlphabets.get(self.product_type) # parse all "mis-aligned" sequences as case-sensitive strings parser = SequenceParser(Sequence, self.product_type) sequences = parser.parse_string(string) # storage for expanded sequences s = [] for dummy in sequences: s.append([]) # expand all sequences with insertion characters and make them equal length for column in A3MSequenceIterator(sequences, str(alphabet.INSERTION)): for sn, char in enumerate(column): s[sn].append(char) # build normal sequence objects from the equalized sequence strings aligned_seqs = [] for sn, seq in enumerate(sequences): sequence = RichSequence(seq.id, seq.header, s[sn], self.product_type) aligned_seqs.append(sequence) return A3MAlignment(aligned_seqs, strict=self.strict)
def testIsEmpty(self): self.assertFalse(self.ali.is_empty) es = RichSequence('s1', '', '') empty = AlignmentResult(0, es, es, 0, 0, 0, 0) self.assertTrue(empty.is_empty)
def _build(self, string): id = str(hash(string)) seq = RichSequence(id, "", string, SequenceTypes.Protein) return SparseChainSequence.create(Chain.from_sequence(seq))