Exemplo n.º 1
0
 def test_sliceIsSequenceCopy(self):
     prot = Sequence("ABCD")
     subProt = prot[1:3]
     self.assertEquals(subProt, Sequence("BC"))
     del subProt[0]
     self.assertEquals(subProt, Sequence("C"))
     self.assertEquals(prot, Sequence("ABCD"))
Exemplo n.º 2
0
    def __align(self, i, j):
        """
        Yields all best alignments starting from row i and column j.
        """
        if self._resultCount == 0:
            return

        # Local alignments are complete when we reach a null score
        # Global alignments are complete when we reach the beginning of the matrix
        # Semiglobal alignments are complete when we reach an edge of the matrix
        if (self._alignMode == "local" and self._alignMatrix[i][j] == 0) \
                or (self._alignMode == "global" and i == 0 and j == 0) \
                or (self._alignMode == "semiglobal" and (i == 0 or j == 0)):

            alignDescription = self._alignMode + \
                               ("-suboptimal" + "(" + str(self._subOptimalDepth) + ")") * self._isSuboptimal

            # Create result (Sequence obj. for MSA, Aligned obj. otherwise)
            if self._isMultiple:
                result = Aligned(self._alignedColSeq,
                                 Sequence(self._alignedRowSeq, self._rowSeq.getDescription()), j, i,
                                 alignDescription, self._currentAlignScore, self._scoreMatrix, True)
            else:
                result = Aligned(Sequence(self._alignedColSeq, self._colSeq.getDescription()),
                                 Sequence(self._alignedRowSeq, self._rowSeq.getDescription()), j, i,
                                 alignDescription, self._currentAlignScore, self._scoreMatrix, False)

            # Remember first best alignment for subobtimal lookup
            if self._bestAlignPath == []:
                self._bestAlignPath = deepcopy(self._currentAlignPath)

            self._resultCount -= 1
            yield result

        else:
            for origin in self._originMatrix[i][j]:
                self._currentAlignPath.append((i, j))

                if origin == "T":  # top
                    self._alignedColSeq.insert(0, "-")
                    self._alignedRowSeq.insert(0, self._rowSeq[i - 1])
                    yield from self.__align(i - 1, j)

                elif origin == "D":  # diagonal
                    self._alignedColSeq.insert(0, self._colSeq[j - 1])
                    self._alignedRowSeq.insert(0, self._rowSeq[i - 1])
                    yield from self.__align(i - 1, j - 1)

                elif origin == "L":  # left
                    self._alignedColSeq.insert(0, self._colSeq[j - 1])
                    self._alignedRowSeq.insert(0, "-")
                    yield from self.__align(i, j - 1)

                else:
                    raise ValueError("Origin must be T (top), D (diagonal) or L (left)")

                self._currentAlignPath.pop()
                del self._alignedColSeq[0]
                del self._alignedRowSeq[0]
Exemplo n.º 3
0
    def __initialize(self, seqA, seqB, iniGapPenalty, extGapPenalty):
        """
        Sets all initial values for required data structures.
        """
        # Sequences
        if len(seqA) == 0 or len(seqB) == 0:
            raise ValueError("Sequences to align cannot be empty")
        self._colSeq = seqA
        self._rowSeq = seqB

        # Alignments
        self._isSuboptimal = False
        self._subOptimalDepth = 0

        self._alignedRowSeq = Sequence()
        if self._isMultiple:
            # In MSA (align with PSSM) there is no column Sequence
            self._alignedColSeq = []
        else:
            self._alignedColSeq = Sequence()

        self._maxAlignScore = 0  # Maximum score found while aligning
        self._maxScoreRows = []  # Rows of maximum score
        self._maxScoreCols = []  # Columns of maximum score

        self._currentAlignPath = []  # indexes of the current alignment
        self._bestAlignPath = []  # indexes of the alignment with the best score

        # Gap penalties
        self._iniGapPenalty = iniGapPenalty
        self._extGapPenalty = iniGapPenalty if extGapPenalty is None else extGapPenalty

        # Matrices
        self._alignMatrix = [[0 for i in range(len(self._colSeq) + 1)] \
                             for j in range(len(self._rowSeq) + 1)]
        self._rowGapMatrix = deepcopy(self._alignMatrix)
        self._colGapMatrix = deepcopy(self._alignMatrix)

        self._originMatrix = [["" for i in range(len(self._colSeq) + 1)] \
                              for j in range(len(self._rowSeq) + 1)]

        # Global alignment : first line and colunm have initial scores and origins
        if self._alignMode == "global":
            self.__initAlignValues()

        # Fill all matrices
        for row in range(1, len(self._rowSeq) + 1):
            for col in range(1, len(self._colSeq) + 1):
                self.__fill(row, col)

        # Find best scores
        self.__findBestScore()
Exemplo n.º 4
0
 def test_setNameMode(self):
     prot = Sequence("A")
     self.assertTrue(len(str(prot)) == 1)
     prot.setNameMode("medium")
     self.assertTrue(len(str(prot)) == 3)
     prot.setNameMode("long")
     self.assertTrue(len(str(prot)) > 3)
Exemplo n.º 5
0
    def __init__(self, path="", description="", ignore=None):
        """
        Creates a Score object.
        If 'path' is provided, loads the Score values from an iij file.
        Otherwise, creates a Score for all possible AminoAcids with values 0.
        """
        self._description = description
        self._ignore = Sequence(ignore)
        self._matrix = []
        self._aaOrder = {}
        self._aaSequence = Sequence()

        # If path is provided, load directly from iij file
        if path != "":
            with open(path, 'r') as file:
                foundAAOrder = False  # Have we found the line with the amino acid values and order yet?
                for line in file:
                    if line[0] != "#":  # Comments

                        if not foundAAOrder:  # Read aa values and order
                            for aa in line.split():
                                self._aaSequence.extend(aa)
                            self._aaOrder = {
                                aa: index
                                for aa, index in zip(
                                    self._aaSequence,
                                    range(len(self._aaSequence)))
                            }
                            foundAAOrder = True
                        else:  # Read matrix values
                            self._matrix.append([int(v) for v in line.split()])

        # Otherwise initialize matrix with 0
        else:
            lineSize = 1
            for aa in AminoAcid.getAllNames():
                if AminoAcid(aa) not in self._ignore:
                    self._aaSequence.extend(aa)
                    self._aaOrder[self._aaSequence[-1]] = lineSize - 1
                    self._matrix.append([0 for i in range(lineSize)])
                    lineSize += 1
Exemplo n.º 6
0
def getSequencesFromFasta(path):
    """
    Loads the FASTA file located in 'path' and yields the Sequences it contains.
    """
    with open(path, 'r') as fastaFile:
        newProtein = None
        for line in fastaFile:
            line_s = line.strip()
            if line_s != "" and line_s[0] == ">":
                if newProtein is not None:
                    yield newProtein
                newProtein = Sequence(None, line_s[1:])
            else:
                newProtein.extend(line_s)
        if len(newProtein) > 0:
            yield newProtein
Exemplo n.º 7
0
 def test_differentValuesNotEqual(self):
     prot1 = Sequence("AB")
     prot2 = Sequence("BA")
     self.assertNotEqual(prot1, prot2)
Exemplo n.º 8
0
 def test_getDescription_staysAfterCopy(self):
     description = "this is a description!"
     prot = Sequence("A", description)
     prot2 = Sequence(prot)
     self.assertEquals(prot2.getDescription(), description)
Exemplo n.º 9
0
 def test_differentNamesEqual(self):
     prot1 = Sequence(["Methionine", "pyrrolysine", "CYSTEINE", "K"])
     prot2 = Sequence("MOCK")
     self.assertEquals(prot1, prot2)
Exemplo n.º 10
0
 def test_copyEqualsOriginal(self):
     prot1 = Sequence("methionine")
     prot2 = Sequence(prot1)
     self.assertEquals(prot1, prot2)
Exemplo n.º 11
0
 def test_count(self):
     prot = Sequence("ABCAX")
     self.assertEquals(prot.count(AminoAcid("A")), 2)
Exemplo n.º 12
0
 def test_remove(self):
     prot = Sequence("TRANKIL")
     prot.remove(AminoAcid("A"))
     self.assertEquals(prot, Sequence("TRNKIL"))
Exemplo n.º 13
0
 def test_extend(self):
     prot = Sequence("OK")
     prot.extend(Sequence("L"))
     prot.extend("M")
     self.assertEquals(prot, Sequence("OKLM"))
Exemplo n.º 14
0
 def test_insert(self):
     prot = Sequence("KL")
     prot.insert(0, "M")
     self.assertEquals(prot, Sequence("MKL"))
Exemplo n.º 15
0
 def test_setSeparator(self):
     prot = Sequence("AE")
     prot.setSeparator("~")
     self.assertEquals(len(str(prot).split("~")), 2)
Exemplo n.º 16
0
 def test_sliceSingleItemIsAminoAcid(self):
     prot = Sequence("ABC")
     aa = prot[0]
     self.assertEquals(aa, AminoAcid("A"))
Exemplo n.º 17
0
 def test_sliceMultipleItemsIsSequence(self):
     prot = Sequence("ABC")
     seq = prot[0:2]
     self.assertEquals(seq, Sequence("AB"))
Exemplo n.º 18
0
 def test_delete(self):
     prot = Sequence("YEP")
     del prot[1]
     self.assertEquals(prot, Sequence("YP"))