def readContigForAbunSplit(folderName, mummerLink, contigFilename, readsetFilename, N1, contigReadGraph): json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) G = [] G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") gapContentLookUpDic = {} furtherGapList = [] for i in range(N1): if len(G.graphNodesList[i].nodeIndexList) > 1: for j in range(len(G.graphNodesList[i].nodeIndexList) - 1): bk, fwd = G.graphNodesList[i].nodeIndexList[ j], G.graphNodesList[i].nodeIndexList[j + 1] key = str(bk) + "_" + str(fwd) if not key in gapContentLookUpDic: furtherGapList.append([bk, fwd]) with open(folderName + "furtherGapList.json", 'w') as f: json.dump(furtherGapList, f) furtherGapContentLookUpList = generateGapContentLookup( folderName, mummerLink, furtherGapList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic) for eachitem in furtherGapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [ eachitem[2], eachitem[3], eachitem[4] ] print eachitem[2:4], len(eachitem[4]) #segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta") print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta") IORobot.extractGraphToContigs(G, folderName, mummerLink, "abunPre.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic) if True: nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink, "abunPre", "abunMum", "abun")
def readContigForAbunSplit(folderName, mummerLink, contigFilename, readsetFilename, N1, contigReadGraph): json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) G = [] G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") gapContentLookUpDic = {} furtherGapList = [] for i in range(N1): if len(G.graphNodesList[i].nodeIndexList) > 1: for j in range(len(G.graphNodesList[i].nodeIndexList) - 1): bk, fwd = G.graphNodesList[i].nodeIndexList[j], G.graphNodesList[i].nodeIndexList[j + 1] key = str(bk) + "_" + str(fwd) if not key in gapContentLookUpDic: furtherGapList.append([bk, fwd]) with open(folderName + "furtherGapList.json", "w") as f: json.dump(furtherGapList, f) furtherGapContentLookUpList = generateGapContentLookup( folderName, mummerLink, furtherGapList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic ) for eachitem in furtherGapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]] print eachitem[2:4], len(eachitem[4]) # segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta") print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta") IORobot.extractGraphToContigs( G, folderName, mummerLink, "abunPre.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic ) if True: nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink, "abunPre", "abunMum", "abun")
def continuousIntegration(): if False: G = graphLib.seqGraph(10) for i in range(5): G.insertEdge(i,i+1,1997) G.insertEdge(i,i+2, 1997) resultList = abunGraphLib.BFS_revisit(1,3,G,1) print "resultList", resultList if False : folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile = \ "Apr10Test/", "/usr/bin/", [[1, 486, 217], [1, 8642, 217], [1, 13465, 217]], [[1, 486, 217]], "improved3_Double.fasta", "phasingSeedName_Double.fasta" abunGraphLib.formPathSeq(folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile) if False: lenDic = IORobot.obtainLength(folderName , contigFile) N1 = len(lenDic) print "N1", N1 G = graphLib.seqGraph(0) G.loadFromFile(folderName, "phaseStringGraph1") adj = [[] for i in range(N1)] for i in range(N1): adj[i] = abunGraphLib.findAllReachable(i, N1, G) Gnew = abunGraphLib.seqGraphDynamic(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i,j,1997) Gnew.initAdv() Gnew.doubleEdgeReduction() contigPaths = abunGraphLib.findAllPathK(1, 217, Gnew, 3) contigReadPaths = abunGraphLib.findAllPathK(1, 217, G, 5) print "contigPaths", contigPaths print "contigReadPaths", contigReadPaths Gnew.transitiveReduction() if False: toDelete = abunGraphLib.decideCut("Apr10Test/", "/usr/bin/") print toDelete if False: G = graphLib.seqGraph(0) G.loadFromFile("Apr10TestA/", "xResolvedGraph") if False: for i in range(len(G.graphNodesList)): v = G.graphNodesList[i] if len(v.nodeIndexList) > 0: print i , v.listOfPrevNodes , v.listOfNextNodes G.reportEdge() lenDic = IORobot.obtainLength("Apr10TestA/", "improved3_Double.fasta") mylist = [401, 207, 405, 407, 344] json_data = open("Apr10TestA/" + "myCountDic.json", 'r') myCountDic = json.load(json_data) for x in mylist: print x, lenDic["Contig"+str(x/2)+"_p"], myCountDic["Segkk"+str(x/2)] if False: folderName = "Apr10TestA/" G = graphLib.seqGraph(0) G.loadFromFile(folderName , "xResolvedGraph") json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) lenDic = IORobot.obtainLength(folderName, "improved3_Double.fasta") print len(G.graphNodesList) print len(mapDummyToRealDic) print "fake N1 , real N1 ", len(G.graphNodesList) - len(mapDummyToRealDic), len(lenDic) if False: abunSplitter.mainFlow("Apr10TestB/", "/usr/bin/") if False: nonRedundantResolver.removeEmbedded("Apr10TestD/", "/usr/bin/") if False: folderName, contigReadGraph = "Apr10TestA/", "phaseStringGraph1" G = graphLib.seqGraph(0) kthres, edgeThres = 3, 1 G.loadFromFile(folderName, contigReadGraph) lenDic = IORobot.obtainLength(folderName , "improved3_Double.fasta") N1 = len(lenDic) adj = [[] for i in range(N1)] for i in range(N1): tmpList = abunGraphLib.findAllReachable(i, N1, G) for j in tmpList: if len(abunGraphLib.findAllPathK(i,j,G,kthres)) >= edgeThres: adj[i].append(j) #print i, adj[i] ### Filter adaptor skipped case adaptorPair = [] for i in range(len(adj)): if i % 2 == 0: if i + 1 in adj[i]: adj[i].remove(i+1) adaptorPair.append([i, i+1]) elif i % 2 ==1: if i-1 in adj[i] : adj[i].remove(i-1) adaptorPair.append([i, i-1]) Gnew = abunGraphLib.seqGraphDynamic(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i,j,1997) for eachpair in adaptorPair: u, v = eachpair[0], eachpair[1] for x in Gnew.graphNodesList[u].listOfPrevNodes: xIndex = x[0] Gnew.removeEdge(xIndex, v) for y in Gnew.graphNodesList[v].listOfNextNodes: yIndex = y[0] Gnew.removeEdge(u, yIndex) #Gnew.reportEdge() count2 = 0 for i in range(len(Gnew.graphNodesList)): if len(Gnew.graphNodesList[i].listOfPrevNodes) == 2 and len(Gnew.graphNodesList[i].listOfNextNodes) == 2: count2 = count2 + 1 print str(i)+"{color:red}" print "count2, ", count2 ### End filter adaptor skipped case if True: nonRedundantResolver.removeRedundantWithFile("May11TestB/" , "/usr/bin/", "abun", "abunDebug", "abunNoEmbed")