Exemplo n.º 1
0
 def __or__(self, node, distance=1):
     return cluster.union(self.flatten(distance), node.flatten(distance))
Exemplo n.º 2
0
 def __or__(self, node, distance=1):
     return cluster.union(
         self.flatten(distance), node.flatten(distance))
Exemplo n.º 3
0
def clearResidualMain(f1, G1,parameterRobot):

    f2, G2 = [], []
    branchLimit = parameterRobot.brachingDepth    
    queue = []
    
    # Classify Big or Small sequences
    print "Classify Big or Small sequences"
    startList = []
    for v in G1:
        v.visited = False
        if len(v.listOfPrevNodes) == 0:
            startList.append(v)
      
    if len(startList) != 0 :
        queue = startList
    else: 
        runningindex = 0

        while len(G1[runningindex].listOfNextNodes) == 0:
            runningindex = runningindex+1
        queue  = [G1[runningindex]]
        #print len(G1[runningindex].listOfNextNodes)
        
    bigList = []
    smallList = []
    for eachelem in queue:
        eachelem.visited = True
        
    while (len(queue) > 0):

        currentNode = queue.pop(0)
        print "currentNode.nodeIndex, len(currentNode.listOfNextNodes), len(currentNode.nodeIndexList)  "    ,currentNode.nodeIndex, len(currentNode.listOfNextNodes), len(currentNode.nodeIndexList)
        
        for eachnextnode in currentNode.listOfNextNodes:
            if eachnextnode.visited == False:
                queue.append(eachnextnode)
                eachnextnode.visited = True
        
        print len(currentNode.nodeIndexList) 
        if len(currentNode.nodeIndexList) > branchLimit:
            bigList.append(currentNode)
        else:
            smallList.append(currentNode)
        
    # Clear Branches originated from big branches
    print "Clear Branches originated from big branches"
    #countGroup = 0
    for v in G1:
        v.visited = False
    
    f1 = sorted(f1)
    countGroup = int(f1[-1][0] ) +1
    #countGroup = countGroup + len(v.nodeIndexList)
    #print "countGroup", countGroup
    
    clusterList = []
    for index in range(countGroup):
        clusterList.append(cluster.clusterElem(index))
    
    print "len(bigList)", len(bigList)
    
    # remove small dead-end in 
    for v in bigList:
        eachitemindex = 0
        while (eachitemindex < len( v.listOfPrevNodes)):
            if len(v.listOfPrevNodes[eachitemindex].nodeIndexList) < branchLimit and len(v.listOfPrevNodes[eachitemindex].listOfPrevNodes) == 0:
                print "eachitemindex", eachitemindex
                v.listOfPrevNodes[eachitemindex].listOfNextNodes = []
                v.listOfPrevNodes[eachitemindex].nodeIndexList = []
                v.listOfPrevNodes.remove(v.listOfPrevNodes[eachitemindex])
                
                
            else:
                eachitemindex = eachitemindex + 1
    
    for v in bigList:
        
        
        levelList = []
        inAtLevelList = []
        outAtLevelList = []
        
        smallNodesListnext = []
        smallNodesListprev = []
        # Collect Associated clusters and put them into levels 
        queue = []
        
        print "v.nodeIndex, len(v.listOfNextNodes)", v.nodeIndex, len(v.listOfNextNodes)
        
        runningindex = 0
        while (runningindex < len(v.listOfNextNodes)):
            eachnode = v.listOfNextNodes[runningindex]
            #print " eachnode.nodeIndex", eachnode.nodeIndex
            if not eachnode in bigList :
                # Cut deadend
                if  len(eachnode.listOfNextNodes) > 0:
                    queue.append([eachnode,0])
                v.listOfNextNodes.remove(eachnode)
                eachnode.listOfPrevNodes.remove(v)
            else:
                runningindex  = runningindex +1 

        inAtLevelList.append([0, v])
        print "len(v.listOfNextNodes),len(v.listOfPrevNodes)", len( v.listOfNextNodes), len(v.listOfPrevNodes)
        #print "len(queue)", len(queue)
        
        while ( len(queue)  > 0):
            currentNode, cumLvl = queue.pop(0)
            currentNode.visited = True
            
            for eachprevnode in currentNode.listOfPrevNodes:
                if eachprevnode in bigList and not [cumLvl, eachprevnode] in inAtLevelList :
                    inAtLevelList.append([cumLvl, eachprevnode])
                if not eachprevnode in bigList :
                    smallNodesListprev.append([ cumLvl, eachprevnode.nodeIndexList[-1],eachprevnode])
                #if eachprevnode.nodeIndex == 3161:
                #    inAtLevelList.append([cumLvl, eachprevnode])
                #    print "???", v.nodeIndex

            for eachnextnode in currentNode.listOfNextNodes:
                if eachnextnode in bigList and not [cumLvl, eachnextnode] in outAtLevelList:
                    outAtLevelList.append([cumLvl+ len(currentNode.nodeIndexList) -1, eachnextnode])
                    
                if not eachnextnode in bigList :
                    smallNodesListnext.append([ cumLvl+len(currentNode.nodeIndexList) -1, eachnextnode.nodeIndexList[0], eachnextnode])
                
            
            for eachindex, runningindex in zip(currentNode.nodeIndexList, range(len(currentNode.nodeIndexList))):
                levelList.append([runningindex + cumLvl, eachindex])
                
            for eachnode in currentNode.listOfNextNodes :
                
                if ( not eachnode in bigList) and ( eachnode.visited == False):
                    queue.append([eachnode, cumLvl+ len(currentNode.nodeIndexList)])
                
                
            # remove edges
            runningindex = 0
            while (runningindex < len(currentNode.listOfNextNodes)):
                eachnode = currentNode.listOfNextNodes[runningindex]
                
                if currentNode in eachnode.listOfPrevNodes:
                    eachnode.listOfPrevNodes.remove(currentNode)
                    currentNode.listOfNextNodes.remove(eachnode)
                else:
                    runningindex = runningindex + 1
            
            runningindex = 0
            while (runningindex < len(currentNode.listOfPrevNodes)):    
                eachnode = currentNode.listOfPrevNodes[runningindex]   
                if currentNode in eachnode.listOfNextNodes:
                    eachnode.listOfNextNodes.remove(currentNode)
                    currentNode.listOfPrevNodes.remove(eachnode)
                else:
                    runningindex = runningindex  +1 
                    
            currentNode.nodeIndexList = []
            
            
        
        # Find backward edges : 
        # internalPairsList : Formats: { (inLvl, outLvl)  }  e.g. {(0,1), (1,2), (2,3), (3,4)... }
        # smallNodesList.append([ cumLvl+len(currentNode.nodeIndexList) -1, eachnextnode.nodeIndex])]
        
        #Filtering of small node missing
        runningindex = 0
        while (runningindex < len(smallNodesListnext) ):
            
            eachitem = smallNodesListnext[runningindex]
            nodeIndex = eachitem[1]
            
            found = False
            for dummy in levelList:
                if nodeIndex == dummy[1]:
                    found = True
            
            if not found:
                outAtLevelList.append([eachitem[0], eachitem[2]])
                smallNodesListnext.pop(runningindex)
            else:
                runningindex = runningindex +1 


        runningindex = 0
        while (runningindex < len(smallNodesListprev) ):
            eachitem = smallNodesListprev[runningindex]
            #print eachitem[2]
            nodeIndex = eachitem[1]
            found = False
            for dummy in levelList:
                if nodeIndex == dummy[1]:
                    found = True
            
            if not found:
                inAtLevelList.append([eachitem[0], eachitem[2]])
                smallNodesListprev.pop(runningindex)
            else:
                runningindex = runningindex +1                 
        #End filtering
        
        ### Special treatment for indel : no backedge added for small nodes
        internalPairsList = []
        #internalPairsList = smallNodeEdges(smallNodesListnext,levelList, "next" ) 
        #internalPairsList = filterSameItem(internalPairsList)
        #print "internalPairsList" , internalPairsList
        
        
        
        #internalPairsList = internalPairsList + smallNodeEdges(smallNodesListprev,levelList, "prev" ) 
        #print "internalPairsList", internalPairsList


        ### Special treatment for indel : no backedge added for small nodes End
        # End Find backward edges 
                

        # Merge Nodes
        levelList= sorted(levelList)
        print "levelList",levelList
        print "inAtLevelList",inAtLevelList
        print "outAtLevelList",outAtLevelList
        inAtLevelList = sorted(inAtLevelList)
        outAtLevelList = sorted(outAtLevelList)
        
        ### list hacks :
        if len(outAtLevelList) > 0 :
            finalOut = outAtLevelList[-1][0]
        else: 
            finalOut = -1
            
        finalinAtLevellist = []
        for eachinlvl in inAtLevelList:
            if eachinlvl[0] > finalOut:
                finalinAtLevellist.append([finalOut, eachinlvl[1]])
            else:
                finalinAtLevellist.append(eachinlvl)
                
        inAtLevelList = sorted(finalinAtLevellist)        
        # End list hacks 
        
        if len(levelList) > 0:
            numberOfLevels = levelList[-1][0]
            toMergeList = [[[],[],[]] for i in range(numberOfLevels+1)]
            
            for item in levelList:
                index = item[0]
                content = item[1]
                #print index
                toMergeList[index][0].append(content)
                
            for item in inAtLevelList:
                index = item[0]
                content = item[1]
                toMergeList[index][1].append(content)
                
            for item in outAtLevelList:
                index = item[0]
                content = item[1]
                toMergeList[index][2].append(content)        
                
                
            print "toMergeList",toMergeList
            
            # init nodes array
            vArray = []
            for i in range(len(toMergeList)):
                idOfNode = toMergeList[i][0][0]
                v = []
                v = graphForm.condensedNode(idOfNode)
                v.updateNodeList()
                vArray.append(v)
                
            for i in range(len(toMergeList)):
                idOfNode = toMergeList[i][0][0]
                mylistOfcluster = toMergeList[i][0]
                mylistOfPrevNodes = toMergeList[i][1]
                mylistOfNextNodes = toMergeList[i][2]
                
                if i > 0:
                    mylistOfPrevNodes = mylistOfPrevNodes + [vArray[i-1]]
                    
                if i < len(toMergeList) -1:  
                    mylistOfNextNodes = mylistOfNextNodes + [vArray[i+1]]
                
                v = vArray[i]
                
                for eachnode in mylistOfPrevNodes:
                    if not eachnode in v.listOfPrevNodes:
                        v.addPrevNodes(eachnode)
                    if not v in eachnode.listOfNextNodes:
                        eachnode.addNextNodes(v)
                    
                    
                for eachnode in mylistOfNextNodes:
                    if not eachnode in v.listOfNextNodes:
                        v.addNextNodes(eachnode)
                    if not v in eachnode.listOfPrevNodes:
                        eachnode.addPrevNodes(v)
                
                
                #print mylistOfcluster
                for eachindex in mylistOfcluster:
                    cluster.union(clusterList[idOfNode], clusterList[eachindex])
            
            
            print "internalPairsList", internalPairsList
            for eachitem in internalPairsList:
                tmpprevnode = eachitem[0]
                tmpnextnode = eachitem[1]

                if not vArray[tmpprevnode] in vArray[tmpnextnode].listOfPrevNodes:
                    vArray[tmpnextnode].listOfPrevNodes.append(vArray[tmpprevnode])
                if not vArray[tmpnextnode] in vArray[tmpprevnode].listOfNextNodes:
                    vArray[tmpprevnode].listOfNextNodes.append(vArray[tmpnextnode])
                        
            for i in range(len(toMergeList)):
                print "vArray[i].nodeIndex, len(vArray[i].listOfPrevNodes), len(vArray[i].listOfNextNodes)",vArray[i].nodeIndex, len(vArray[i].listOfPrevNodes), len(vArray[i].listOfNextNodes)
            
        
    # Formatting Return 
    print "Formatting Return "
    seqGraphNodes = []
    print len(bigList)
    #for eachitem in bigList:
    #    print len(eachitem.nodeIndexList), len(eachitem.listOfPrevNodes), len(eachitem.listOfNextNodes), eachitem.listOfPrevNodes[0].nodeIndex
    queue = [bigList[0]]
    
    while len(queue) > 0:
        currentNode = queue.pop(0)
        currentNode.visited = True
        #print "currentNode.nodeIndex ", currentNode.nodeIndex
        if len(currentNode.nodeIndexList) > 0:
            seqGraphNodes.append(currentNode)
        
        for eachnode in currentNode.listOfNextNodes:
            if eachnode.visited == False:
                queue.append(eachnode)

     
    #G2 =            seqGraphNodes
    G2,startList2 = graphForm.condenseGraph(seqGraphNodes)  
    sizeOfGraph = len(G2) 
    for index in range(sizeOfGraph):
        G2,startList2 = graphForm.condenseGraph(G2)  

    # Hacking the deadends :- ??? 
    for eachnode in G2:
        if len(eachnode.listOfPrevNodes) == 0 :
            print "no prev"
            runningindex =0 
            while (runningindex < len(eachnode.listOfNextNodes)):
                if eachnode in eachnode.listOfNextNodes[runningindex].listOfPrevNodes:
                    eachnode.listOfNextNodes[runningindex].listOfPrevNodes.remove(eachnode)
                else:
                    runningindex = runningindex+1
            eachnode.listOfNextNodes = []
            eachnode.nodeIndexList = []
            
        if len(eachnode.listOfNextNodes) == 0:
            print "nonext", eachnode.nodeIndex, len(eachnode.nodeIndexList)
            runningindex =0 
            while (runningindex < len(eachnode.listOfPrevNodes)):
                if eachnode in eachnode.listOfPrevNodes[runningindex].listOfNextNodes:
                    eachnode.listOfPrevNodes[runningindex].listOfNextNodes.remove(eachnode)
                    print "eachnode.nodIndex, len(eachnode.nodeIndexList)", eachnode.nodeIndex, len(eachnode.nodeIndexList)
                    print "eachnode.listOfPrevNodes[runningindex].nodeIndex",eachnode.listOfPrevNodes[runningindex].nodeIndex
                else:
                    runningindex = runningindex +1 
            eachnode.listOfPrevNodes = []
            eachnode.nodeIndexList = []    
    
      
    #print   "len(G2)", len(G2)

    runningindex =0 
    while (runningindex < len(G2)):
        if len(G2[runningindex].nodeIndexList) == 0:
            G2.pop(runningindex)
        else:
            runningindex = runningindex + 1 
    
    print "len(G2)", len(G2)
     
    #G2, startList2 = graphForm.newCondensingStep(G2)
    
    for trial in range(5):
        G2, startList = graphForm.transitiveReduction(G2)
        G2, startList  = graphForm.newCondensingStep(G2)   
 
    
        G2, startList = graphForm.removeLoopsAndCycles(G2)
        G2, startList  = graphForm.newCondensingStep(G2) 
   
    
        G2, startList = graphForm.combineSelfReferal(G2)
        G2, startList  = graphForm.newCondensingStep(G2) 
        
        G2, startList = graphForm.endRemoval(G2)
        G2, startList  = graphForm.newCondensingStep(G2)   

    
    G2, startList = graphForm.flowBalancingTransform(G2, parameterRobot, f1)
    ### Finish hack
    # OutputFormat :  Gp id , read #, offset #, fusedOrNot, prevGroup id 
    f2 = []
    for eachitem in f1:
        oldGpid = eachitem[0]

        newGp = cluster.find(clusterList[oldGpid])
        newGpid = newGp.id

        readNum = eachitem[1]
        offset = eachitem[2]
        
        if len(cluster.familyList(clusterList[newGpid])) == 1:
            fused = False
        else:
            fused = True
            
        rowRecord = [newGpid, readNum, offset, fused, oldGpid]
        
        f2.append(rowRecord)
    
    f2 = sorted(f2) 

    return f2, G2