Пример #1
0
def formConfirmReadResolve(folderName, inList, outList, G, Grev, N1):
    #print "formConfirmReadResolve"

    resolvedList = []
    confirmingReadList = []
    brLFlankList = []
    brRFlankList = []

    ### Find possible candidate reads
    print "inList , outList formConfirmReadResolve()", inList, outList
    for eachin in inList:
        for eachout in outList:
            pathList = abunGraphLib.findAllPathK(eachin, eachout, G, 3)
            for path in pathList:
                if len(path) == 3 and path[1] >= N1:
                    R = path[1]
                    confirmingReadList.append(R)
                    brLFlankList.append([eachin, R])
                    brRFlankList.append([eachout, R])

    ### Filter simple false cases
    toUseReadDic = {}
    confirmingReadList.sort()
    for key, items in groupby(confirmingReadList):
        toUseReadDic[str(key)] = True

    newbrLFlankList = abunHouseKeeper.getDistinct(brLFlankList)
    newbrLFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrLFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    newbrRFlankList = abunHouseKeeper.getDistinct(brRFlankList)
    newbrRFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrRFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    finalSearchReadList = []
    for eachitem in toUseReadDic:
        if toUseReadDic[eachitem] == True:
            finalSearchReadList.append(int(eachitem))

    ### Check paths to confirm all false cases
    for eachR in finalSearchReadList:
        l1 = abunGraphLib.findAllReachable(eachR, N1, G)
        l2 = abunGraphLib.findAllReachable(eachR, N1, Grev)

        l1Distinct = abunHouseKeeper.getDistinct(l1)
        l2Distinct = abunHouseKeeper.getDistinct(l2)

        if len(l1Distinct) == 1 and len(l2Distinct) == 1:
            c1, c2 = l1Distinct[0], l2Distinct[0]
            resolvedList.append([c2, c1])

    return resolvedList
Пример #2
0
def graphSurgery(myCountDic, folderName, contigReadGraph, mummerLink, readsetFilename, contigFilename):

    ### Transitive reduction and remove double pointers
    N1 = len(myCountDic) * 2
    print "N1", N1
    kthres = abunHouseKeeper.abunGlobalSplitParameterRobot.kthres
    edgeThres = abunHouseKeeper.abunGlobalSplitParameterRobot.edgeThres

    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    adj = [[] for i in range(N1)]

    for i in range(N1):
        tmpList = abunGraphLib.findAllReachable(i, N1, G)

        for j in tmpList:
            if len(abunGraphLib.findAllPathK(i, j, G, kthres)) >= edgeThres:
                adj[i].append(j)

    ### Filter adaptor skipped case

    adaptorPair = []

    for i in range(len(adj)):
        if i % 2 == 0:
            if i + 1 in adj[i]:
                adj[i].remove(i + 1)
                adaptorPair.append([i, i + 1])
        elif i % 2 == 1:
            if i - 1 in adj[i]:
                adj[i].remove(i - 1)
                adaptorPair.append([i, i - 1])

    Gnew = abunGraphLib.seqGraphDynamic(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1997)

    for eachpair in adaptorPair:
        u, v = eachpair[0], eachpair[1]
        for x in Gnew.graphNodesList[u].listOfPrevNodes:
            xIndex = x[0]
            Gnew.removeEdge(xIndex, v)
        for y in Gnew.graphNodesList[v].listOfNextNodes:
            yIndex = y[0]
            Gnew.removeEdge(u, yIndex)

    ### Trying out the new component
    import toCondenseFixer

    Gnew = toCondenseFixer.noGoZoneDefiner(Gnew, folderName)

    Gnew.symGraph()
    ### End filter adaptor skipped case

    if abunHouseKeeper.abunGlobalSplitParameterRobot.runGraphSurgery:

        Gnew.initAdv()
        if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunCondenseRemove:
            Gnew.condenseEdgeRemove(G, folderName, mummerLink, contigFilename)

        if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunDoubltPtr:
            Gnew.doubleEdgeReduction()

        if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunTransitive:
            Gnew.transitiveReduction(
                folderName, mummerLink, contigFilename + "_Double.fasta", readsetFilename + "_Double.fasta", G
            )

        Gnew.condense()
        Gnew.findAdjList()
    else:
        Gnew.initAdv()
        Gnew.condense()
        Gnew.findAdjList()

    return Gnew
Пример #3
0
def singleGapLookUp(eachmatchpair, folderName, N1, mummerLink, contigReadGraph, contigFilename, readsetFilename):

    print eachmatchpair
    leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent = eachmatchpair[0], eachmatchpair[-1], 0, 0, ""

    succReadsList = abunGraphLib.findPathBtwEnds(folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1)

    succReadsList = []
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    allPaths = abunGraphLib.findAllPathK(leftCtgIndex, rightCtgIndex, G, 5)
    # shuffle(allPaths)

    print "allPaths", allPaths

    possibleList = []
    for p in allPaths:
        noContig = True
        for pp in p[1:-1]:
            if pp < N1:
                noContig = False
        if noContig == True:
            possibleList.append(p)
    print "possibleList", possibleList

    minListLen = 1000
    for p in possibleList:
        if len(p) < minListLen:
            succReadsList = p
            minListLen = len(p)

    if len(succReadsList) > 0:
        succReadsList.pop(0)
        succReadsList.pop(-1)
    else:
        print "interesting item for future study"

    print "succReadsList", succReadsList

    if len(succReadsList) == 0:
        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1)
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )

        print "overlap contig : ", overlap

        leftEnd = len(leftSeg) - overlap[0]
        middleContent = ""

    else:

        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1)
        print contigName
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        readName = abunHouseKeeper.parseIDToName(succReadsList[0], "R", N1)
        print readName
        rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )

        print "overlap start read : ", overlap

        leftEnd = len(leftSeg) - overlap[0]

        middleContent = ""

        for i in range(len(succReadsList) - 1):
            readName = abunHouseKeeper.parseIDToName(succReadsList[i], "R", N1)
            leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

            readName = abunHouseKeeper.parseIDToName(succReadsList[i + 1], "R", N1)
            rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

            overlap = IORobot.alignWithName(
                leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
            )
            print "overlap middle read : ", overlap
            middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]]

        readName = abunHouseKeeper.parseIDToName(succReadsList[-1], "R", N1)
        leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )
        print "overlap end read : ", overlap

        middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]]

    return [leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent]
Пример #4
0
def formConfirmReadResolve(folderName, inList, outList, G, Grev, N1):
    # print "formConfirmReadResolve"

    resolvedList = []
    confirmingReadList = []
    brLFlankList = []
    brRFlankList = []

    ### Find possible candidate reads
    print "inList , outList formConfirmReadResolve()", inList, outList
    for eachin in inList:
        for eachout in outList:
            pathList = abunGraphLib.findAllPathK(eachin, eachout, G, 3)

            for path in pathList:

                if len(path) == 3 and path[1] >= N1:
                    R = path[1]
                    confirmingReadList.append(R)
                    brLFlankList.append([eachin, R])
                    brRFlankList.append([eachout, R])

    ### Filter simple false cases
    toUseReadDic = {}
    confirmingReadList.sort()
    for key, items in groupby(confirmingReadList):
        toUseReadDic[str(key)] = True

    newbrLFlankList = abunHouseKeeper.getDistinct(brLFlankList)
    newbrLFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrLFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    newbrRFlankList = abunHouseKeeper.getDistinct(brRFlankList)
    newbrRFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrRFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    finalSearchReadList = []
    for eachitem in toUseReadDic:
        if toUseReadDic[eachitem] == True:
            finalSearchReadList.append(int(eachitem))

    ### Check paths to confirm all false cases
    for eachR in finalSearchReadList:
        l1 = abunGraphLib.findAllReachable(eachR, N1, G)
        l2 = abunGraphLib.findAllReachable(eachR, N1, Grev)

        l1Distinct = abunHouseKeeper.getDistinct(l1)
        l2Distinct = abunHouseKeeper.getDistinct(l2)

        if len(l1Distinct) == 1 and len(l2Distinct) == 1:
            c1, c2 = l1Distinct[0], l2Distinct[0]
            resolvedList.append([c2, c1])

    return resolvedList
Пример #5
0
def continuousIntegration():
	if False:
		G = graphLib.seqGraph(10)
		for i in range(5):
			G.insertEdge(i,i+1,1997)
			G.insertEdge(i,i+2, 1997)

		resultList = abunGraphLib.BFS_revisit(1,3,G,1)

		print "resultList", resultList 

	if False : 

		folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile = \
			"Apr10Test/", "/usr/bin/", [[1, 486, 217], [1, 8642, 217], [1, 13465, 217]], [[1, 486, 217]], "improved3_Double.fasta", "phasingSeedName_Double.fasta"

		abunGraphLib.formPathSeq(folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile)
    
		if False:
			lenDic = IORobot.obtainLength(folderName , contigFile)
			N1 = len(lenDic)

			print "N1", N1

			G = graphLib.seqGraph(0)
			G.loadFromFile(folderName, "phaseStringGraph1")

			adj = [[] for i in range(N1)]

			for i in range(N1): 
			    adj[i] = abunGraphLib.findAllReachable(i, N1, G)

			Gnew = abunGraphLib.seqGraphDynamic(N1)

			for i in range(N1):
			    for j in adj[i]:
			        Gnew.insertEdge(i,j,1997)


			Gnew.initAdv()    
			Gnew.doubleEdgeReduction()

			contigPaths = abunGraphLib.findAllPathK(1, 217, Gnew, 3)
			contigReadPaths = abunGraphLib.findAllPathK(1, 217, G, 5)

			print "contigPaths", contigPaths
			print "contigReadPaths", contigReadPaths

			Gnew.transitiveReduction()

	if False:
		toDelete = abunGraphLib.decideCut("Apr10Test/", "/usr/bin/")
		print toDelete

	if False:
		G = graphLib.seqGraph(0)
		G.loadFromFile("Apr10TestA/", "xResolvedGraph")

		if False:
			for i in range(len(G.graphNodesList)):

				v = G.graphNodesList[i]

				if len(v.nodeIndexList) > 0:
					print i , v.listOfPrevNodes , v.listOfNextNodes

		G.reportEdge()
		lenDic = IORobot.obtainLength("Apr10TestA/", "improved3_Double.fasta")
		mylist = [401, 207, 405, 407, 344]

		json_data = open("Apr10TestA/" + "myCountDic.json", 'r')
		myCountDic = json.load(json_data)

		for x in mylist:
			print x, lenDic["Contig"+str(x/2)+"_p"], myCountDic["Segkk"+str(x/2)]


	if False:
		folderName = "Apr10TestA/"
		G = graphLib.seqGraph(0)
		G.loadFromFile(folderName , "xResolvedGraph")

		json_data = open(folderName + "mapDummyToRealDic.json", 'r')
		mapDummyToRealDic = json.load(json_data)

		lenDic = IORobot.obtainLength(folderName, "improved3_Double.fasta")
		print len(G.graphNodesList)
		print len(mapDummyToRealDic)
		
		print "fake N1 , real N1 ", len(G.graphNodesList) - len(mapDummyToRealDic), len(lenDic)


	if False:
		abunSplitter.mainFlow("Apr10TestB/", "/usr/bin/")

	if False: 
		nonRedundantResolver.removeEmbedded("Apr10TestD/", "/usr/bin/")

	if False:
		folderName, contigReadGraph = "Apr10TestA/", "phaseStringGraph1"
		G = graphLib.seqGraph(0)
		kthres, edgeThres = 3, 1
		G.loadFromFile(folderName, contigReadGraph)
		lenDic = IORobot.obtainLength(folderName , "improved3_Double.fasta")

		N1 = len(lenDic)

		adj = [[] for i in range(N1)]

		for i in range(N1): 
		    tmpList = abunGraphLib.findAllReachable(i, N1, G)
		    
		    for j in tmpList:
		        if len(abunGraphLib.findAllPathK(i,j,G,kthres)) >= edgeThres:
		            adj[i].append(j) 

		    #print i, adj[i]

	    ### Filter adaptor skipped case 

		adaptorPair = []

		for i in range(len(adj)):
		    if  i % 2 == 0:
		        if i + 1 in adj[i]:
		            adj[i].remove(i+1)
		            adaptorPair.append([i, i+1])
		    elif i % 2 ==1: 
		        if i-1 in adj[i] :
		            adj[i].remove(i-1)
		            adaptorPair.append([i, i-1])

		Gnew = abunGraphLib.seqGraphDynamic(N1)

		for i in range(N1):
		    for j in adj[i]:
		        Gnew.insertEdge(i,j,1997)

		for eachpair in adaptorPair:
		    u, v = eachpair[0], eachpair[1]
		    for x in Gnew.graphNodesList[u].listOfPrevNodes:
		        xIndex = x[0]
		        Gnew.removeEdge(xIndex, v)
		    for y in Gnew.graphNodesList[v].listOfNextNodes:
		        yIndex = y[0]
		        Gnew.removeEdge(u, yIndex)


        #Gnew.reportEdge()
		count2 = 0
		for i in range(len(Gnew.graphNodesList)):
			if  len(Gnew.graphNodesList[i].listOfPrevNodes) == 2 and  len(Gnew.graphNodesList[i].listOfNextNodes) == 2:
				count2 = count2 + 1
				print str(i)+"{color:red}"

		print "count2, ", count2

		### End filter adaptor skipped case 
	if True:
		nonRedundantResolver.removeRedundantWithFile("May11TestB/" , "/usr/bin/", "abun", "abunDebug", "abunNoEmbed")
Пример #6
0
def graphSurgery(myCountDic, folderName, contigReadGraph, mummerLink,
                 readsetFilename, contigFilename):

    ### Transitive reduction and remove double pointers
    N1 = len(myCountDic) * 2
    print "N1", N1
    kthres = abunHouseKeeper.abunGlobalSplitParameterRobot.kthres
    edgeThres = abunHouseKeeper.abunGlobalSplitParameterRobot.edgeThres

    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    adj = [[] for i in range(N1)]

    for i in range(N1):
        tmpList = abunGraphLib.findAllReachable(i, N1, G)

        for j in tmpList:
            if len(abunGraphLib.findAllPathK(i, j, G, kthres)) >= edgeThres:
                adj[i].append(j)

    ### Filter adaptor skipped case

    adaptorPair = []

    for i in range(len(adj)):
        if i % 2 == 0:
            if i + 1 in adj[i]:
                adj[i].remove(i + 1)
                adaptorPair.append([i, i + 1])
        elif i % 2 == 1:
            if i - 1 in adj[i]:
                adj[i].remove(i - 1)
                adaptorPair.append([i, i - 1])

    Gnew = abunGraphLib.seqGraphDynamic(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1997)

    for eachpair in adaptorPair:
        u, v = eachpair[0], eachpair[1]
        for x in Gnew.graphNodesList[u].listOfPrevNodes:
            xIndex = x[0]
            Gnew.removeEdge(xIndex, v)
        for y in Gnew.graphNodesList[v].listOfNextNodes:
            yIndex = y[0]
            Gnew.removeEdge(u, yIndex)

    Gnew.reportEdge()
    ### Trying out the new component
    import toCondenseFixer
    Gnew = toCondenseFixer.noGoZoneDefiner(Gnew, folderName)

    Gnew.symGraph()
    #Gnew.reportEdge()
    ### End filter adaptor skipped case

    if abunHouseKeeper.abunGlobalSplitParameterRobot.runGraphSurgery:

        Gnew.initAdv()
        if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunCondenseRemove:
            Gnew.condenseEdgeRemove(G, folderName, mummerLink, contigFilename)

        if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunDoubltPtr:
            Gnew.doubleEdgeReduction()

        if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunTransitive:
            Gnew.transitiveReduction(folderName, mummerLink,
                                     contigFilename + "_Double.fasta",
                                     readsetFilename + "_Double.fasta", G)

        Gnew.condense()
        Gnew.findAdjList()
    else:
        Gnew.initAdv()
        Gnew.condense()
        Gnew.findAdjList()

    return Gnew