Пример #1
0
def readContigForAbunSplit(folderName, mummerLink, contigFilename,
                           readsetFilename, N1, contigReadGraph):

    json_data = open(folderName + "mapDummyToRealDic.json", 'r')
    mapDummyToRealDic = json.load(json_data)

    G = []
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, "xResolvedGraph")

    gapContentLookUpDic = {}

    furtherGapList = []
    for i in range(N1):
        if len(G.graphNodesList[i].nodeIndexList) > 1:
            for j in range(len(G.graphNodesList[i].nodeIndexList) - 1):

                bk, fwd = G.graphNodesList[i].nodeIndexList[
                    j], G.graphNodesList[i].nodeIndexList[j + 1]

                key = str(bk) + "_" + str(fwd)

                if not key in gapContentLookUpDic:
                    furtherGapList.append([bk, fwd])

    with open(folderName + "furtherGapList.json", 'w') as f:
        json.dump(furtherGapList, f)

    furtherGapContentLookUpList = generateGapContentLookup(
        folderName, mummerLink, furtherGapList, contigReadGraph,
        contigFilename, readsetFilename, mapDummyToRealDic)

    for eachitem in furtherGapContentLookUpList:
        gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [
            eachitem[2], eachitem[3], eachitem[4]
        ]
        print eachitem[2:4], len(eachitem[4])

    #segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta")

    print "Final step: really hacking a file"
    os.system("cp " + folderName + contigFilename + "_Double.fasta " +
              folderName + "tmpWithDummy.fasta")
    contigList = IORobot.readContigsFromFile(folderName,
                                             contigFilename + "_Double.fasta")

    IORobot.extractGraphToContigs(G, folderName, mummerLink, "abunPre.fasta",
                                  "tmpWithDummy.fasta", gapContentLookUpDic,
                                  mapDummyToRealDic)

    if True:
        nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink,
                                                     "abunPre", "abunMum",
                                                     "abun")
Пример #2
0
def readContigForAbunSplit(folderName, mummerLink, contigFilename, readsetFilename, N1, contigReadGraph):

    json_data = open(folderName + "mapDummyToRealDic.json", "r")
    mapDummyToRealDic = json.load(json_data)

    G = []
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, "xResolvedGraph")

    gapContentLookUpDic = {}

    furtherGapList = []
    for i in range(N1):
        if len(G.graphNodesList[i].nodeIndexList) > 1:
            for j in range(len(G.graphNodesList[i].nodeIndexList) - 1):

                bk, fwd = G.graphNodesList[i].nodeIndexList[j], G.graphNodesList[i].nodeIndexList[j + 1]

                key = str(bk) + "_" + str(fwd)

                if not key in gapContentLookUpDic:
                    furtherGapList.append([bk, fwd])

    with open(folderName + "furtherGapList.json", "w") as f:
        json.dump(furtherGapList, f)

    furtherGapContentLookUpList = generateGapContentLookup(
        folderName, mummerLink, furtherGapList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic
    )

    for eachitem in furtherGapContentLookUpList:
        gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]]
        print eachitem[2:4], len(eachitem[4])

    # segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta")

    print "Final step: really hacking a file"
    os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta")
    contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta")

    IORobot.extractGraphToContigs(
        G, folderName, mummerLink, "abunPre.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic
    )

    if True:
        nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink, "abunPre", "abunMum", "abun")
Пример #3
0
def continuousIntegration():
	if False:
		G = graphLib.seqGraph(10)
		for i in range(5):
			G.insertEdge(i,i+1,1997)
			G.insertEdge(i,i+2, 1997)

		resultList = abunGraphLib.BFS_revisit(1,3,G,1)

		print "resultList", resultList 

	if False : 

		folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile = \
			"Apr10Test/", "/usr/bin/", [[1, 486, 217], [1, 8642, 217], [1, 13465, 217]], [[1, 486, 217]], "improved3_Double.fasta", "phasingSeedName_Double.fasta"

		abunGraphLib.formPathSeq(folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile)
    
		if False:
			lenDic = IORobot.obtainLength(folderName , contigFile)
			N1 = len(lenDic)

			print "N1", N1

			G = graphLib.seqGraph(0)
			G.loadFromFile(folderName, "phaseStringGraph1")

			adj = [[] for i in range(N1)]

			for i in range(N1): 
			    adj[i] = abunGraphLib.findAllReachable(i, N1, G)

			Gnew = abunGraphLib.seqGraphDynamic(N1)

			for i in range(N1):
			    for j in adj[i]:
			        Gnew.insertEdge(i,j,1997)


			Gnew.initAdv()    
			Gnew.doubleEdgeReduction()

			contigPaths = abunGraphLib.findAllPathK(1, 217, Gnew, 3)
			contigReadPaths = abunGraphLib.findAllPathK(1, 217, G, 5)

			print "contigPaths", contigPaths
			print "contigReadPaths", contigReadPaths

			Gnew.transitiveReduction()

	if False:
		toDelete = abunGraphLib.decideCut("Apr10Test/", "/usr/bin/")
		print toDelete

	if False:
		G = graphLib.seqGraph(0)
		G.loadFromFile("Apr10TestA/", "xResolvedGraph")

		if False:
			for i in range(len(G.graphNodesList)):

				v = G.graphNodesList[i]

				if len(v.nodeIndexList) > 0:
					print i , v.listOfPrevNodes , v.listOfNextNodes

		G.reportEdge()
		lenDic = IORobot.obtainLength("Apr10TestA/", "improved3_Double.fasta")
		mylist = [401, 207, 405, 407, 344]

		json_data = open("Apr10TestA/" + "myCountDic.json", 'r')
		myCountDic = json.load(json_data)

		for x in mylist:
			print x, lenDic["Contig"+str(x/2)+"_p"], myCountDic["Segkk"+str(x/2)]


	if False:
		folderName = "Apr10TestA/"
		G = graphLib.seqGraph(0)
		G.loadFromFile(folderName , "xResolvedGraph")

		json_data = open(folderName + "mapDummyToRealDic.json", 'r')
		mapDummyToRealDic = json.load(json_data)

		lenDic = IORobot.obtainLength(folderName, "improved3_Double.fasta")
		print len(G.graphNodesList)
		print len(mapDummyToRealDic)
		
		print "fake N1 , real N1 ", len(G.graphNodesList) - len(mapDummyToRealDic), len(lenDic)


	if False:
		abunSplitter.mainFlow("Apr10TestB/", "/usr/bin/")

	if False: 
		nonRedundantResolver.removeEmbedded("Apr10TestD/", "/usr/bin/")

	if False:
		folderName, contigReadGraph = "Apr10TestA/", "phaseStringGraph1"
		G = graphLib.seqGraph(0)
		kthres, edgeThres = 3, 1
		G.loadFromFile(folderName, contigReadGraph)
		lenDic = IORobot.obtainLength(folderName , "improved3_Double.fasta")

		N1 = len(lenDic)

		adj = [[] for i in range(N1)]

		for i in range(N1): 
		    tmpList = abunGraphLib.findAllReachable(i, N1, G)
		    
		    for j in tmpList:
		        if len(abunGraphLib.findAllPathK(i,j,G,kthres)) >= edgeThres:
		            adj[i].append(j) 

		    #print i, adj[i]

	    ### Filter adaptor skipped case 

		adaptorPair = []

		for i in range(len(adj)):
		    if  i % 2 == 0:
		        if i + 1 in adj[i]:
		            adj[i].remove(i+1)
		            adaptorPair.append([i, i+1])
		    elif i % 2 ==1: 
		        if i-1 in adj[i] :
		            adj[i].remove(i-1)
		            adaptorPair.append([i, i-1])

		Gnew = abunGraphLib.seqGraphDynamic(N1)

		for i in range(N1):
		    for j in adj[i]:
		        Gnew.insertEdge(i,j,1997)

		for eachpair in adaptorPair:
		    u, v = eachpair[0], eachpair[1]
		    for x in Gnew.graphNodesList[u].listOfPrevNodes:
		        xIndex = x[0]
		        Gnew.removeEdge(xIndex, v)
		    for y in Gnew.graphNodesList[v].listOfNextNodes:
		        yIndex = y[0]
		        Gnew.removeEdge(u, yIndex)


        #Gnew.reportEdge()
		count2 = 0
		for i in range(len(Gnew.graphNodesList)):
			if  len(Gnew.graphNodesList[i].listOfPrevNodes) == 2 and  len(Gnew.graphNodesList[i].listOfNextNodes) == 2:
				count2 = count2 + 1
				print str(i)+"{color:red}"

		print "count2, ", count2

		### End filter adaptor skipped case 
	if True:
		nonRedundantResolver.removeRedundantWithFile("May11TestB/" , "/usr/bin/", "abun", "abunDebug", "abunNoEmbed")