def adjListToRepeatList(newAdjacencyList, folderName, repeatFilename): N1 = len(newAdjacencyList) G2 = abunGraphLib.seqGraphWt(N1 * 2) for i in range(N1): for j in newAdjacencyList[i]: G2.insertEdge(2 * i, 2 * j + 1, 1) G2.insertEdge(2 * j + 1, 2 * i, 1) clusters = G2.findConnectedComponents() repeatList = [] for eachitem in clusters: leftList, rightList = [], [] for eachsubitem in eachitem: if eachsubitem % 2 == 0: leftList.append(eachsubitem) else: rightList.append(eachsubitem) repeatList.append([ abunHouseKeeper.getDistinct(leftList), abunHouseKeeper.getDistinct(rightList) ]) with open(folderName + repeatFilename, 'w') as outfile: json.dump(repeatList, outfile) json_data = open(folderName + repeatFilename, 'r') loadData = json.load(json_data) assert (loadData == repeatList)
def formConfirmReadResolve(folderName, inList, outList, G, Grev, N1): #print "formConfirmReadResolve" resolvedList = [] confirmingReadList = [] brLFlankList = [] brRFlankList = [] ### Find possible candidate reads print "inList , outList formConfirmReadResolve()", inList, outList for eachin in inList: for eachout in outList: pathList = abunGraphLib.findAllPathK(eachin, eachout, G, 3) for path in pathList: if len(path) == 3 and path[1] >= N1: R = path[1] confirmingReadList.append(R) brLFlankList.append([eachin, R]) brRFlankList.append([eachout, R]) ### Filter simple false cases toUseReadDic = {} confirmingReadList.sort() for key, items in groupby(confirmingReadList): toUseReadDic[str(key)] = True newbrLFlankList = abunHouseKeeper.getDistinct(brLFlankList) newbrLFlankList.sort(key=itemgetter(1)) for key, items in groupby(newbrLFlankList, itemgetter(1)): mylist = list(items) if len(mylist) > 1: toUseReadDic[str(key)] = False newbrRFlankList = abunHouseKeeper.getDistinct(brRFlankList) newbrRFlankList.sort(key=itemgetter(1)) for key, items in groupby(newbrRFlankList, itemgetter(1)): mylist = list(items) if len(mylist) > 1: toUseReadDic[str(key)] = False finalSearchReadList = [] for eachitem in toUseReadDic: if toUseReadDic[eachitem] == True: finalSearchReadList.append(int(eachitem)) ### Check paths to confirm all false cases for eachR in finalSearchReadList: l1 = abunGraphLib.findAllReachable(eachR, N1, G) l2 = abunGraphLib.findAllReachable(eachR, N1, Grev) l1Distinct = abunHouseKeeper.getDistinct(l1) l2Distinct = abunHouseKeeper.getDistinct(l2) if len(l1Distinct) == 1 and len(l2Distinct) == 1: c1, c2 = l1Distinct[0], l2Distinct[0] resolvedList.append([c2, c1]) return resolvedList
def adjListToRepeatList(newAdjacencyList,folderName,repeatFilename): N1 = len(newAdjacencyList) G2 = abunGraphLib.seqGraphWt(N1 * 2) for i in range(N1): for j in newAdjacencyList[i]: G2.insertEdge(2 * i, 2 * j + 1, 1) G2.insertEdge(2 * j + 1, 2 * i, 1) clusters = G2.findConnectedComponents() repeatList = [] for eachitem in clusters: leftList, rightList = [], [] for eachsubitem in eachitem: if eachsubitem % 2 == 0 : leftList.append(eachsubitem) else: rightList.append(eachsubitem) repeatList.append([abunHouseKeeper.getDistinct(leftList), abunHouseKeeper.getDistinct(rightList)]) with open(folderName + repeatFilename, 'w') as outfile: json.dump(repeatList, outfile) json_data = open(folderName + repeatFilename, 'r') loadData = json.load(json_data) assert(loadData == repeatList)
def findAttachedReads(x, side, folderName, sortedContigList, sortedContigDic, lenDicContig, lenDicRead): rList = [] ''' Format : [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [ IDY] | [TAGS] ===================================================================================== 1 562 | 819 1418 | 562 600 | 84.72 | Contig0_d Read121_d 1 562 | 4077 3478 | 562 600 | 84.72 | Contig0_d Read121_p 1 564 | 656 68 | 564 589 | 90.13 | Contig0_d Read382_d 1 564 | 6996 7584 | 564 589 | 90.13 | Contig0_d Read382_p 1 571 | 1386 815 | 571 572 | 86.60 | Contig0_d Read421_d ''' thres = thresMiddleContig key = abunHouseKeeper.parseIDToName(x, 'C', 0) if key in sortedContigDic: tmp = sortedContigDic[key] while tmp < len(sortedContigList) and sortedContigList[tmp][-2] == key: eachsub = sortedContigList[tmp] if overlapCR(eachsub, side, thres, lenDicContig, lenDicRead): rList.append(eachsub[-1]) tmp = tmp + 1 distinctRList = abunHouseKeeper.getDistinct(rList) else: distinctRList = [] return distinctRList
def filterConfidResolve(resolvedList): newResolvedList = [] resolvedList.sort() conThres = abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres print "conThres", conThres for key, items in groupby(resolvedList): tmpList = list(items) if len(tmpList) >= conThres: newResolvedList.append(key) if False: noConflict = resolveConflict(abunHouseKeeper.getDistinct(resolvedList)) noConflict = abunHouseKeeper.getDistinct(noConflict) newResolvedList = abunHouseKeeper.getDistinct(newResolvedList) newResolvedList = abunHouseKeeper.getDistinct(intersect(newResolvedList, noConflict)) return newResolvedList
def resolveConflictX(listA, listB): resolvedList = [[] for i in range(len(listA))] print "len(listA), len(listB)", len(listA), len(listB) for i in range(len(listA)): combinedList = listA[i] + listB[i] newCombinedList = abunHouseKeeper.getDistinct(combinedList) tmpResolved = resolveConflict(newCombinedList) resolvedList[i] = tmpResolved return resolvedList
def filterConfidResolve(resolvedList): newResolvedList = [] resolvedList.sort() conThres = abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres print "conThres", conThres for key, items in groupby(resolvedList): tmpList = list(items) if len(tmpList) >= conThres: newResolvedList.append(key) if False: noConflict = resolveConflict(abunHouseKeeper.getDistinct(resolvedList)) noConflict = abunHouseKeeper.getDistinct(noConflict) newResolvedList = abunHouseKeeper.getDistinct(newResolvedList) newResolvedList = abunHouseKeeper.getDistinct( intersect(newResolvedList, noConflict)) return newResolvedList
def findAttachedContigs(rList, side, folderName, sortedReadList, sortedReadDic, lenDicContig,lenDicRead): cList = [] thres = thresMiddleContig for r in rList: if r in sortedReadDic: tmp = sortedReadDic[r] while tmp < len(sortedReadList) and sortedReadList[tmp][-1] == r: eachsub = sortedReadList[tmp] if overlapCRJustREnd(eachsub, side, thres, lenDicContig, lenDicRead): cList.append([eachsub[-2], r]) tmp = tmp + 1 newCList = abunHouseKeeper.getDistinct(cList) return newCList
def findAttachedContigs(rList, side, folderName, sortedReadList, sortedReadDic, lenDicContig, lenDicRead): cList = [] thres = thresMiddleContig for r in rList: if r in sortedReadDic: tmp = sortedReadDic[r] while tmp < len(sortedReadList) and sortedReadList[tmp][-1] == r: eachsub = sortedReadList[tmp] if overlapCRJustREnd(eachsub, side, thres, lenDicContig, lenDicRead): cList.append([eachsub[-2], r]) tmp = tmp + 1 newCList = abunHouseKeeper.getDistinct(cList) return newCList
def findAttachedReads(x, side, folderName,sortedContigList,sortedContigDic, lenDicContig,lenDicRead): rList = [] ''' Format : [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [ IDY] | [TAGS] ===================================================================================== 1 562 | 819 1418 | 562 600 | 84.72 | Contig0_d Read121_d 1 562 | 4077 3478 | 562 600 | 84.72 | Contig0_d Read121_p 1 564 | 656 68 | 564 589 | 90.13 | Contig0_d Read382_d 1 564 | 6996 7584 | 564 589 | 90.13 | Contig0_d Read382_p 1 571 | 1386 815 | 571 572 | 86.60 | Contig0_d Read421_d ''' thres = thresMiddleContig key = abunHouseKeeper.parseIDToName(x, 'C', 0) if key in sortedContigDic: tmp = sortedContigDic[key] while tmp < len(sortedContigList) and sortedContigList[tmp][-2] == key: eachsub = sortedContigList[tmp] if overlapCR(eachsub, side, thres, lenDicContig,lenDicRead): rList.append(eachsub[-1]) tmp = tmp + 1 distinctRList = abunHouseKeeper.getDistinct(rList) else: distinctRList = [] return distinctRList
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph, contigFilename, readsetFilename): N1 = len(myCountDic) * 2 print "N1", N1 # Debug G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) adj = [[] for i in range(N1)] for i in range(N1): adj[i] = abunGraphLib.findAllReachable(i, N1, G) Gnew = graphLib.seqGraph(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i, j, 1) Gnew.reportEdge() # End Debug if False: json_data = open(folderName + "phaseRepeat.txt", 'r') repeatPairs = json.load(json_data) repeatPairs = obtainNonEmpty(repeatPairs) biResolvedCombineList = [] for eachitem in repeatPairs: inList, outList = eachitem[0], eachitem[1] resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1) biResolvedCombineList += resolvedList ### Xnode repeatResolution xResolvedList, mapDummyToRealDic = xNodeResolving( folderName, contigReadGraph) ### Combine resolution resolvedList = xResolvedList + biResolvedCombineList resolvedList = abunHouseKeeper.getDistinct(resolvedList) print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len( resolvedList), len(xResolvedList), len(biResolvedCombineList) with open(folderName + "resolvedList.json", 'w') as f: json.dump(resolvedList, f) with open(folderName + "mapDummyToRealDic.json", 'w') as f: json.dump(mapDummyToRealDic, f) if False: json_data = open(folderName + "resolvedList.json", 'r') resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) gapContentLookUpList = [] gapContentLookUpList = generateGapContentLookup( folderName, mummerLink, resolvedList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic) gapContentLookUpDic = {} gapContentLookUpList.sort() for eachitem in gapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [ eachitem[2], eachitem[3], eachitem[4] ] print eachitem[2:4], len(eachitem[4]) with open(folderName + "gapContentLookUpDic.json", 'w') as f: json.dump(gapContentLookUpDic, f) if False: json_data = open(folderName + "resolvedList.json", 'r') resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(N1 + len(mapDummyToRealDic)) addEdges(G, resolvedList) G.condense() G.saveToFile(folderName, "xResolvedGraph") if False: json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") json_data = open(folderName + "gapContentLookUpDic.json", 'r') gapContentLookUpDic = json.load(json_data) print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile( folderName, contigFilename + "_Double.fasta") f = open(folderName + "tmpWithDummy.fasta", 'a') for i in range(len(mapDummyToRealDic)): id = mapDummyToRealDic[str(i)] f.write(">SegDum" + str(i) + "\n") f.write(contigList[id] + "\n") f.close() IORobot.extractGraphToContigs(G, folderName, mummerLink, "abun.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic)
def identifyRepeat(folderName, mummerLink,contigFilename,contigReadGraph, repeatFilename, optionToRun ): ''' Input : Graph --- phaseStringGraph1 Output: repeat pairs { [ (1,2), (3,4) ] , [(5,6),(7,8)] } Algorithm: a) Reachability test on the graph to find the partners b) Form Bipartite graph c) Find connected component in the bipartite and define as repeat pairs ''' # ## (a) reachability test to find partners G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) # G.reportEdge() lenDicCC = IORobot.obtainLength(folderName, contigFilename+"_Double.fasta") adjacencyList = [[] for i in range(len(lenDicCC))] N1 = len(lenDicCC) # # Debug # for i in range(14): # debugGraphPath(i, 2, G, N1) # # End Debug for i in range(len(lenDicCC)): adjacencyList[i] = abunGraphLib.findAllReachable(i, N1, G) print "i, adjacencyList[i] : ", i , adjacencyList[i] # ## (b) formation of bipartite graph if optionToRun == "tandem" : newAdjacencyList = adjacencyList elif optionToRun == "xphase": newAdjacencyList = abunGraphLib.filterEdge(adjacencyList, folderName, contigFilename) G2 = abunGraphLib.seqGraphWt(N1 * 2) for i in range(N1): for j in newAdjacencyList[i]: G2.insertEdge(2 * i, 2 * j + 1, 1) G2.insertEdge(2 * j + 1, 2 * i, 1) clusters = G2.findConnectedComponents() repeatList = [] for eachitem in clusters: leftList, rightList = [], [] for eachsubitem in eachitem: if eachsubitem % 2 == 0 : leftList.append(eachsubitem) else: rightList.append(eachsubitem) repeatList.append([abunHouseKeeper.getDistinct(leftList), abunHouseKeeper.getDistinct(rightList)]) with open(folderName + repeatFilename, 'w') as outfile: json.dump(repeatList, outfile) json_data = open(folderName + repeatFilename, 'r') loadData = json.load(json_data) assert(loadData == repeatList)
def identifyRepeat(folderName, mummerLink, contigFilename, contigReadGraph, repeatFilename, optionToRun): ''' Input : Graph --- phaseStringGraph1 Output: repeat pairs { [ (1,2), (3,4) ] , [(5,6),(7,8)] } Algorithm: a) Reachability test on the graph to find the partners b) Form Bipartite graph c) Find connected component in the bipartite and define as repeat pairs ''' # ## (a) reachability test to find partners G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) # G.reportEdge() lenDicCC = IORobot.obtainLength(folderName, contigFilename + "_Double.fasta") adjacencyList = [[] for i in range(len(lenDicCC))] N1 = len(lenDicCC) # # Debug # for i in range(14): # debugGraphPath(i, 2, G, N1) # # End Debug for i in range(len(lenDicCC)): adjacencyList[i] = abunGraphLib.findAllReachable(i, N1, G) print "i, adjacencyList[i] : ", i, adjacencyList[i] # ## (b) formation of bipartite graph if optionToRun == "tandem": newAdjacencyList = adjacencyList elif optionToRun == "xphase": newAdjacencyList = abunGraphLib.filterEdge(adjacencyList, folderName, contigFilename) G2 = abunGraphLib.seqGraphWt(N1 * 2) for i in range(N1): for j in newAdjacencyList[i]: G2.insertEdge(2 * i, 2 * j + 1, 1) G2.insertEdge(2 * j + 1, 2 * i, 1) clusters = G2.findConnectedComponents() repeatList = [] for eachitem in clusters: leftList, rightList = [], [] for eachsubitem in eachitem: if eachsubitem % 2 == 0: leftList.append(eachsubitem) else: rightList.append(eachsubitem) repeatList.append([ abunHouseKeeper.getDistinct(leftList), abunHouseKeeper.getDistinct(rightList) ]) with open(folderName + repeatFilename, 'w') as outfile: json.dump(repeatList, outfile) json_data = open(folderName + repeatFilename, 'r') loadData = json.load(json_data) assert (loadData == repeatList)
def BResolution(Gnew, folderName, contigReadGraph, N1, myCountDic, lenDic): if abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve: print "abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve", abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve maxRThres = abunHouseKeeper.abunGlobalSplitParameterRobot.RThres repeatFinder.adjListToRepeatList(Gnew.adj, folderName, "phaseRepeatTR.txt") json_data = open(folderName + "phaseRepeatTR.txt", "r") repeatPairs = json.load(json_data) repeatPairs = obtainNonEmpty(repeatPairs) biResolvedCombineList = [] G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) Grev = abunGraphLib.formReverseGraphFast(G) abunAnalysisList = [] for eachitem in repeatPairs: inList, outList = eachitem[0], eachitem[1] resolvedList, brResolvedList = [], [] if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAbunB: if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB > 0: abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLower = ( abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB ) if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB > 0: abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpper = ( abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB ) if not abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAggB: resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1) else: resolvedList = determindMatchAggregate( inList, outList, myCountDic, folderName, contigReadGraph, N1, Gnew, lenDic ) if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunBRB: if abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB > 0: abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres = ( abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB ) brResolvedList = formBRReolve(folderName, inList, outList, G, Grev, True, N1) combinedList = abunHouseKeeper.getDistinct(resolvedList + brResolvedList) print "resolvedList, brResolvedList, inList, outList", resolvedList, brResolvedList, inList, outList print "resolveConflict(combinedList)", resolveConflict(combinedList) abunAnalysisList.append([inList, outList, resolvedList, brResolvedList, resolveConflict(combinedList)]) if len(inList) <= maxRThres and len(outList) <= maxRThres and len(inList) > 0 and len(outList) > 0: # biResolvedCombineList += resolveConflict(combinedList) resolvedCombine = resolveConflict(combinedList) ### kkdebug Gnew.bipartiteLocalResolve(resolvedCombine, inList, outList, folderName) # json_data = open(folderName + "hackBRResolveList.json", 'r') # dataItem = json.load(json_data) # Gnew.bipartiteResolve(dataItem) ### end kkdebug Gnew.condense() with open(folderName + "biResolvedCombineList.json", "w") as f: json.dump(biResolvedCombineList, f) with open(folderName + "abunAnalysisList.json", "w") as f: json.dump(abunAnalysisList, f) # assert(1==2) return Gnew else: return Gnew
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph, contigFilename, readsetFilename): N1 = len(myCountDic) * 2 print "N1", N1 # Debug G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) adj = [[] for i in range(N1)] for i in range(N1): adj[i] = abunGraphLib.findAllReachable(i, N1, G) Gnew = graphLib.seqGraph(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i, j, 1) Gnew.reportEdge() # End Debug if False: json_data = open(folderName + "phaseRepeat.txt", "r") repeatPairs = json.load(json_data) repeatPairs = obtainNonEmpty(repeatPairs) biResolvedCombineList = [] for eachitem in repeatPairs: inList, outList = eachitem[0], eachitem[1] resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1) biResolvedCombineList += resolvedList ### Xnode repeatResolution xResolvedList, mapDummyToRealDic = xNodeResolving(folderName, contigReadGraph) ### Combine resolution resolvedList = xResolvedList + biResolvedCombineList resolvedList = abunHouseKeeper.getDistinct(resolvedList) print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len( resolvedList ), len(xResolvedList), len(biResolvedCombineList) with open(folderName + "resolvedList.json", "w") as f: json.dump(resolvedList, f) with open(folderName + "mapDummyToRealDic.json", "w") as f: json.dump(mapDummyToRealDic, f) if False: json_data = open(folderName + "resolvedList.json", "r") resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) gapContentLookUpList = [] gapContentLookUpList = generateGapContentLookup( folderName, mummerLink, resolvedList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic ) gapContentLookUpDic = {} gapContentLookUpList.sort() for eachitem in gapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]] print eachitem[2:4], len(eachitem[4]) with open(folderName + "gapContentLookUpDic.json", "w") as f: json.dump(gapContentLookUpDic, f) if False: json_data = open(folderName + "resolvedList.json", "r") resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(N1 + len(mapDummyToRealDic)) addEdges(G, resolvedList) G.condense() G.saveToFile(folderName, "xResolvedGraph") if False: json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") json_data = open(folderName + "gapContentLookUpDic.json", "r") gapContentLookUpDic = json.load(json_data) print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta") f = open(folderName + "tmpWithDummy.fasta", "a") for i in range(len(mapDummyToRealDic)): id = mapDummyToRealDic[str(i)] f.write(">SegDum" + str(i) + "\n") f.write(contigList[id] + "\n") f.close() IORobot.extractGraphToContigs( G, folderName, mummerLink, "abun.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic )
def formConfirmReadResolve(folderName, inList, outList, G, Grev, N1): # print "formConfirmReadResolve" resolvedList = [] confirmingReadList = [] brLFlankList = [] brRFlankList = [] ### Find possible candidate reads print "inList , outList formConfirmReadResolve()", inList, outList for eachin in inList: for eachout in outList: pathList = abunGraphLib.findAllPathK(eachin, eachout, G, 3) for path in pathList: if len(path) == 3 and path[1] >= N1: R = path[1] confirmingReadList.append(R) brLFlankList.append([eachin, R]) brRFlankList.append([eachout, R]) ### Filter simple false cases toUseReadDic = {} confirmingReadList.sort() for key, items in groupby(confirmingReadList): toUseReadDic[str(key)] = True newbrLFlankList = abunHouseKeeper.getDistinct(brLFlankList) newbrLFlankList.sort(key=itemgetter(1)) for key, items in groupby(newbrLFlankList, itemgetter(1)): mylist = list(items) if len(mylist) > 1: toUseReadDic[str(key)] = False newbrRFlankList = abunHouseKeeper.getDistinct(brRFlankList) newbrRFlankList.sort(key=itemgetter(1)) for key, items in groupby(newbrRFlankList, itemgetter(1)): mylist = list(items) if len(mylist) > 1: toUseReadDic[str(key)] = False finalSearchReadList = [] for eachitem in toUseReadDic: if toUseReadDic[eachitem] == True: finalSearchReadList.append(int(eachitem)) ### Check paths to confirm all false cases for eachR in finalSearchReadList: l1 = abunGraphLib.findAllReachable(eachR, N1, G) l2 = abunGraphLib.findAllReachable(eachR, N1, Grev) l1Distinct = abunHouseKeeper.getDistinct(l1) l2Distinct = abunHouseKeeper.getDistinct(l2) if len(l1Distinct) == 1 and len(l2Distinct) == 1: c1, c2 = l1Distinct[0], l2Distinct[0] resolvedList.append([c2, c1]) return resolvedList
def BResolution(Gnew, folderName, contigReadGraph, N1, myCountDic, lenDic, mummerLink): if abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve: print "abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve", abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve maxRThres = abunHouseKeeper.abunGlobalSplitParameterRobot.RThres repeatFinder.adjListToRepeatList(Gnew.adj, folderName, "phaseRepeatTR.txt") json_data = open(folderName + "phaseRepeatTR.txt", 'r') repeatPairs = json.load(json_data) repeatPairs = obtainNonEmpty(repeatPairs) biResolvedCombineList = [] G = abunGraphLib.seqGraphWt(0) G.loadFromFile(folderName, contigReadGraph) Grev = abunGraphLib.formReverseGraphFast(G) abunAnalysisList = [] for eachitem in repeatPairs: inList, outList = eachitem[0], eachitem[1] if not abunHouseKeeper.abunGlobalRunEM: resolvedList, brResolvedList = [], [] if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAbunB: if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB > 0: abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLower = abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB > 0: abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpper = abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB if not abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAggB: resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1) else: resolvedList = determindMatchAggregate( inList, outList, myCountDic, folderName, contigReadGraph, N1, Gnew, lenDic) if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunBRB: if abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB > 0: abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres = abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB brResolvedList = formBRReolve(folderName, inList, outList, G, Grev, True, N1) combinedList = abunHouseKeeper.getDistinct(resolvedList + brResolvedList) print "resolvedList, brResolvedList, inList, outList", resolvedList, brResolvedList, inList, outList print "resolveConflict(combinedList)", resolveConflict( combinedList) abunAnalysisList.append([ inList, outList, resolvedList, brResolvedList, resolveConflict(combinedList) ]) if len(inList) <= maxRThres and len( outList) <= maxRThres and len(inList) > 0 and len( outList) > 0: resolvedCombine = resolveConflict(combinedList) Gnew.bipartiteLocalResolve(resolvedCombine, inList, outList, folderName) else: import emalgo resolvedCombine = emalgo.BResolvePreparation( folderName, inList, outList, G, Grev, N1, mummerLink) Gnew.bipartiteLocalResolve(resolvedCombine, inList, outList, folderName) Gnew.condense() with open(folderName + "biResolvedCombineList.json", 'w') as f: json.dump(biResolvedCombineList, f) with open(folderName + "abunAnalysisList.json", 'w') as f: json.dump(abunAnalysisList, f) #assert(1==2) return Gnew else: return Gnew