示例#1
0
文件: cro.py 项目: sha443/CRO_RNA
    def FindMinimumStructure(self, mole, minEnrg, minEnrgIndex, path, fileName,
                             sequence):
        flag = []
        mol = []
        flagValid = []
        infoEnergy = []
        moleculeSequence = []
        scElements = []
        pkElements = []
        elElements = []
        makePair = []
        # Initialization
        for i in range(len(mole.sequence)):
            flag.append(0)
            mol.append(".")
            flagValid.append(0)
        # endfor

        tempInfo = []
        # Retrive info and sort according to the length
        for i in mole.moleculeTable[minEnrgIndex]:
            tempInfo.append(mole.infoTable[i])
        # endfor
        # sort
        tempInfo = sorted(tempInfo, key=lambda x: x[2], reverse=True)
        # print(tempInfo)

        # Construction of secondary structure
        for base in tempInfo:
            start, end, length = base

            # Search inside for making bond
            for j, k in zip(range(start, start + length, 1), range(end, 0,
                                                                   -1)):
                if (flag[j] == 0 and flag[k] == 0):
                    flag[j] = 2
                    flag[k] = 2
                    flagValid[j] = 1  # (
                    flagValid[k] = 2  # )
                # endif
            # End for j,k

            # Search for 3 or more bp
            startPair = None
            endPair = None

            for j, k in zip(range(start, start + length, 1), range(end, 0,
                                                                   -1)):
                # Check if first valid bond is found
                stem = 0
                short = 0

                if (flag[j] == 2 and flag[k] == 2
                        and CRO().Equal12(flagValid, j, k)):
                    startPair = (j, k)
                    while (flag[j] == 2 and flag[k] == 2
                           and CRO().Equal12(flagValid, j, k) and j <= k):
                        stem += 1
                        j += 1
                        k -= 1
                        # May not needed
                        endPair = (j, k)
                    # endwhile

                    # Revoke if not found enough stems

                    if (stem < 3 and stem > 0):
                        f, t = startPair
                        for x, y in zip(range(f, f + stem, 1),
                                        range(t, t - stem, -1)):
                            flag[x] = 0
                            flag[y] = 0
                            flagValid[x] = 0
                            flagValid[y] = 0
                        # endfor

                    # Else add to mol and info
                    else:
                        f, t = startPair
                        scElements.append([f, t, stem])  # start,end,length
                        for x, y in zip(range(f, f + stem, 1), range(t, 0,
                                                                     -1)):
                            flag[x] = 1
                            flag[y] = 1
                            mol[x] = "("
                            mol[y] = ")"
                            infoEnergy.append((x, y))  # start, end
                        # endfor

                    # endif stem

                # endif
            # Endfor j,k
        # Endfor basepair = start, end, length
        # print(scElements)
        # print(population.PrintableMolecule(mol))
        # print(scElements)
        # Finding pseudoknot
        mol2 = mol[:]  # make a duplicate of molecule
        for i, j, len1 in tempInfo:
            for base in tempInfo:
                k, l, len2 = base

                if (i < k and k < j
                        and j < l):  # condiiton for H-type Pseudoknot

                    # Pseudoknot info
                    # Loop lenght calculation for energy evaluation
                    l1 = k - (i + len1)
                    l2 = (j - len1 + 1) - (k + len2)
                    l3 = (l - len2) - j
                    if (pk.LoopsFulfill(l1, l2, l3)):

                        # Search inside for making pk
                        for u, v in zip(range(k, k + len2, 1), range(l, 0,
                                                                     -1)):
                            if (flag[u] == 0 and flag[v] == 0):
                                flag[u] = 2
                                flag[v] = 2
                                flagValid[u] = 3  # [
                                flagValid[v] = 4  # ]
                            # endif
                        # endfor

                        # Search for 2 or more bp for making pk
                        startPk = None
                        endPk = None

                        for u, v in zip(range(k, k + len2, 1), range(l, 0,
                                                                     -1)):
                            # Checy if first valid bond is found
                            stem = 0
                            if (flag[u] == 2 and flag[v] == 2
                                    and CRO().Equal34(flagValid, u, v)):
                                startPair = (u, v)
                                uu = u
                                vv = v
                                while (flag[u] == 2 and flag[v] == 2
                                       and CRO().Equal34(flagValid, u, v)
                                       and u <= v):

                                    # Check if it is still valid counting the future stem
                                    l1 = uu - (i + len1)
                                    l2 = (j - len1 + 1) - (uu + stem + 1)
                                    l3 = (vv - stem - 1) - j
                                    stillValid = pk.LoopsFulfill(l1, l2, l3)
                                    if (stillValid):
                                        stem += 1
                                        u += 1
                                        v -= 1
                                        # May not needed
                                        endPair = (u, v)
                                    else:
                                        break
                                # endwhile

                                # Revoke if not found enough stems (at least 2)
                                if (stem < 2
                                        and stem > 0):  # or (not stillValid)
                                    f, t = startPair
                                    for x, y in zip(range(f, f + stem, 1),
                                                    range(t, t - stem, -1)):
                                        flag[x] = 0
                                        flag[y] = 0
                                        flagValid[x] = 0
                                        flagValid[y] = 0
                                    # endfor

                                # add to mol and info
                                elif (stillValid):
                                    f, t = startPair
                                    # print(i,j,f,t,len1,stem,l1,l2,l3)
                                    pkElements.append(
                                        [i, j, f, t, len1, stem, l1, l2, l3])
                                    elElements.append([f, t, stem])

                                    #scElements.append([j,k,stem])
                                    for x, y in zip(range(f, f + stem, 1),
                                                    range(t, 0, -1)):
                                        flag[x] = 1
                                        flag[y] = 1
                                        mol2[x] = "["
                                        mol2[y] = "]"

                                        # Must be removed later
                                        infoEnergy.append((x, y))
                                    # endfor
                                # endif stem

                            # endif
                        # Endfor x,y
                    else:
                        marker = 0  #, l1, l2, l3,len1,len2= pk.Overlap(l1, l2, l3, len1, len2)
                        if (marker):
                            # Resolvable overlap
                            # print(l1,l2,l3,"pk-OL",len1,len2)
                            pass

                # end pseudo condition
            # end for k,l, l2
        # end for i,j,l1
        # ======================================================
        # Finding Recursive Pseudoknot
        # ======================================================
        mol4 = mol2[:]  # make a duplicate of molecule 2; mol3 is equal to mole2 at this time.
        for i, j, len1 in tempInfo:
            for k, l, len2 in tempInfo:
                if (i < k and k < j
                        and j < l):  # condiiton for H-type Pseudoknot
                    # Pseudoknot info
                    # Loop lenght calculation for energy evaluation
                    l1 = k - (i + len1)
                    l2 = (j - len1 + 1) - (k + len2)
                    l3 = (l - len2) - j
                    if (pk.LoopsFulfill(l1, l2, l3)):
                        # print(j,k,len2,"pk")

                        # Search inside for making pk
                        for u, v in zip(range(k, k + len2, 1), range(l, 0,
                                                                     -1)):
                            if (flag[u] == 0 and flag[v] == 0):
                                flag[u] = 3
                                flag[v] = 3
                                flagValid[u] = 5  # {
                                flagValid[v] = 6  # }
                            # endif
                        # endfor

                        # Search for 2 or more bp for making pk
                        startPk = None
                        endPk = None

                        for u, v in zip(range(k, k + len2, 1), range(l, 0,
                                                                     -1)):
                            # Checy if first valid bond is found
                            stem = 0
                            if (flag[u] == 3 and flag[v] == 3):
                                startPair = (u, v)
                                uu = u
                                vv = v
                                while (flag[u] == 3 and flag[v] == 3
                                       and u <= v):

                                    # Check if it is still valid counting the future stem
                                    l1 = uu - (i + len1)
                                    l2 = (j - len1 + 1) - (uu + stem + 1)
                                    l3 = (vv - stem - 1) - j
                                    stillValid = pk.LoopsFulfill(l1, l2, l3)
                                    if (stillValid):
                                        stem += 1
                                        u += 1
                                        v -= 1
                                        # May not needed
                                        endPair = (u, v)
                                    else:
                                        break
                                # endwhile

                                # Revoke if not found enough stems (at least 2)
                                if (stem < 2
                                        and stem > 0):  # or (not stillValid)
                                    f, t = startPair
                                    for x, y in zip(range(f, f + stem, 1),
                                                    range(t, t - stem, -1)):
                                        flag[x] = 0
                                        flag[y] = 0
                                        flagValid[x] = 0
                                        flagValid[y] = 0
                                    # endfor

                                # add to mol and info
                                elif (stillValid):
                                    f, t = startPair
                                    # print(i,j,f,t,len1,stem,l1,l2,l3)
                                    pkElements.append(
                                        [i, j, f, t, len1, stem, l1, l2, l3])
                                    #scElements.append([f,t,stem])
                                    for x, y in zip(range(f, f + stem, 1),
                                                    range(t, 0, -1)):
                                        flag[x] = 1
                                        flag[y] = 1
                                        mol4[x] = "{"
                                        mol4[y] = "}"

                                        # Must be removed later
                                        infoEnergy.append((x, y))
                                        makePair.append((x, y))
                                    # endfor
                                # endif stem

                            # endif
                        # Endfor u,v
                    # Endif LoopsFulfill
                # endif pseudo condition
            # end for k,l, l2
        # end for i,j,l1

        # if(mol4!=mol2):
        #     print(PrintableMolecule(mol2),"mol2")
        #     print(PrintableMolecule(mol4),"mol4")
        # Found helpful
        # endif

        # Energy evaluation
        turnerEnergy = 0
        for stem in scElements:
            turnerEnergy += energy.Turner04Handlar(stem, sequence)
        # endfor

        # Pseudoknot energy
        pkEnergy = 0
        if (pkElements):
            pkEnergy = pk.PseudoknotHandler(scElements, pkElements, sequence)
# print(pkEnergy)
# endif

        # Minus first stem energy
        elEnergy = 0
        if (elElements):
            for stem in elElements:
                turnerEnergy -= energy.Turner04Handlar(stem, sequence)
            # endfor
        # endif

        totalEnergy = turnerEnergy + pkEnergy

        self.structureFound = population.PrintableMolecule(mol4)
        # print(self.structureFound,"\t",totalEnergy)
        benchmark = open(path + "benchmark/" + fileName, "r").read()

        sen, sp, f_m, tp, fp, fn = func.Performance(self.structureFound,
                                                    benchmark)
        return sen, sp, f_m, tp, fp, fn, self.structureFound, totalEnergy