def formResolveListForMatch(folderName, newInList, newOutList, sd, contigReadGraph, N1): resolvedList = [] for eachitem in newInList: found = satisfyMatch(eachitem, newOutList, sd) if found != -1: leftCtgIndex, rightCtgIndex = eachitem[0], found succReadsList = abunGraphLib.findPathBtwEnds(folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1) if succReadsList != None: resolvedList.append([eachitem[0], found]) return resolvedList
def formResolveListForMatch(folderName, newInList, newOutList, sd, contigReadGraph, N1): resolvedList = [] for eachitem in newInList: found = satisfyMatch(eachitem, newOutList, sd) if found != -1: leftCtgIndex, rightCtgIndex = eachitem[0], found succReadsList = abunGraphLib.findPathBtwEnds( folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1) if succReadsList != None: resolvedList.append([eachitem[0], found]) return resolvedList
def xNodeResolving(folderName, contigReadGraph): ### Init G, myCountDic, N1 G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) with open(folderName + "myCountDic.json") as f: myCountDic = json.load(f) N1 = len(myCountDic) * 2 ### Add resolved edge adj = [[] for i in range(N1)] for i in range(N1): adj[i] = abunGraphLib.findAllReachable(i, N1, G) Gnew = graphLib.seqGraph(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i, j, 1) extraCounter = 0 mapDummyToRealDic = {} resolvedList = [] for v in Gnew.graphNodesList: inList = [] for eachitem in v.listOfPrevNodes: inList.append(eachitem[0]) outList = [] for eachitem in v.listOfNextNodes: outList.append(eachitem[0]) inListCt = getCtTwoToOne(inList, myCountDic) outListCt = getCtTwoToOne(outList, myCountDic) sizeList = [] for eachitem in myCountDic: sizeList.append(myCountDic[eachitem]) sd = np.std(sizeList) for eachIn in inListCt: matchedOut = satisfyMatch(eachIn, outListCt, sd) if matchedOut != -1: leftCtgIndex, rightCtgIndex = eachIn[0], v.nodeIndex inSuccReadsList = abunGraphLib.findPathBtwEnds( folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1 ) leftCtgIndex, rightCtgIndex = v.nodeIndex, matchedOut outSuccReadsList = abunGraphLib.findPathBtwEnds( folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1 ) if inSuccReadsList != None and outSuccReadsList != None: resolvedList.append([eachIn[0]] + inSuccReadsList + [N1 + extraCounter]) print "in: ", resolvedList[-1] resolvedList.append([N1 + extraCounter] + outSuccReadsList + [matchedOut]) print "out: ", resolvedList[-1] mapDummyToRealDic[extraCounter] = v.nodeIndex extraCounter = extraCounter + 1 return resolvedList, mapDummyToRealDic
def singleGapLookUp(eachmatchpair, folderName, N1, mummerLink, contigReadGraph, contigFilename, readsetFilename): print eachmatchpair leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent = eachmatchpair[0], eachmatchpair[-1], 0, 0, "" succReadsList = abunGraphLib.findPathBtwEnds(folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1) succReadsList = [] G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) allPaths = abunGraphLib.findAllPathK(leftCtgIndex, rightCtgIndex, G, 5) # shuffle(allPaths) print "allPaths", allPaths possibleList = [] for p in allPaths: noContig = True for pp in p[1:-1]: if pp < N1: noContig = False if noContig == True: possibleList.append(p) print "possibleList", possibleList minListLen = 1000 for p in possibleList: if len(p) < minListLen: succReadsList = p minListLen = len(p) if len(succReadsList) > 0: succReadsList.pop(0) succReadsList.pop(-1) else: print "interesting item for future study" print "succReadsList", succReadsList if len(succReadsList) == 0: contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1) leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1) rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap contig : ", overlap leftEnd = len(leftSeg) - overlap[0] middleContent = "" else: contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1) print contigName leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) readName = abunHouseKeeper.parseIDToName(succReadsList[0], "R", N1) print readName rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap start read : ", overlap leftEnd = len(leftSeg) - overlap[0] middleContent = "" for i in range(len(succReadsList) - 1): readName = abunHouseKeeper.parseIDToName(succReadsList[i], "R", N1) leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) readName = abunHouseKeeper.parseIDToName(succReadsList[i + 1], "R", N1) rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap middle read : ", overlap middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]] readName = abunHouseKeeper.parseIDToName(succReadsList[-1], "R", N1) leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1) rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap end read : ", overlap middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]] return [leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent]
def xNodeResolving(folderName, contigReadGraph): ''' Input : contigGraph , abunInfo , folderName Output: myresolvedList.json, gapContentLookUp.json, dummyNodeMapping.json Algorithm : 1) Tranverse the graph a) If the node can well be fixed with sd requirement met i) Link it across and add the pair into the myresolvedList, gapContentLookUp ii) Add dummynodes and fill in the dummyNodeMapping 2) Format return and output as temp file ''' ### Init G, myCountDic, N1 G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) with open(folderName + 'myCountDic.json') as f: myCountDic = json.load(f) N1 = len(myCountDic) * 2 ### Add resolved edge adj = [[] for i in range(N1)] for i in range(N1): adj[i] = abunGraphLib.findAllReachable(i, N1, G) Gnew = graphLib.seqGraph(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i, j, 1) extraCounter = 0 mapDummyToRealDic = {} resolvedList = [] for v in Gnew.graphNodesList: inList = [] for eachitem in v.listOfPrevNodes: inList.append(eachitem[0]) outList = [] for eachitem in v.listOfNextNodes: outList.append(eachitem[0]) inListCt = getCtTwoToOne(inList, myCountDic) outListCt = getCtTwoToOne(outList, myCountDic) sizeList = [] for eachitem in myCountDic: sizeList.append(myCountDic[eachitem]) sd = np.std(sizeList) for eachIn in inListCt: matchedOut = satisfyMatch(eachIn, outListCt, sd) if matchedOut != -1: leftCtgIndex, rightCtgIndex = eachIn[0], v.nodeIndex inSuccReadsList = abunGraphLib.findPathBtwEnds( folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1) leftCtgIndex, rightCtgIndex = v.nodeIndex, matchedOut outSuccReadsList = abunGraphLib.findPathBtwEnds( folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1) if inSuccReadsList != None and outSuccReadsList != None: resolvedList.append([eachIn[0]] + inSuccReadsList + [N1 + extraCounter]) print "in: ", resolvedList[-1] resolvedList.append([N1 + extraCounter] + outSuccReadsList + [matchedOut]) print "out: ", resolvedList[-1] mapDummyToRealDic[extraCounter] = v.nodeIndex extraCounter = extraCounter + 1 return resolvedList, mapDummyToRealDic