def createIndelRobot(folderName): indelRobot = common.parameterRobot() indelRobot.defaultFolder = folderName # indelRobot.setReadStat( Nshort= parameterRobot.N, Nlong= parameterRobot.N, Lshort= parameterRobot.L, Llong= parameterRobot.L, p= parameterRobot.p , longOnly = True) # indelRobot.setGenomeStat(G = parameterRobot.G, lrep=500, lsnp=200, lint=50 ) indelRobot.setThresholdPara(liid=30, thresForRandom=0.5, thresForins=0.4, thresFordel=0.4, insMin=4, delMin=4, thresholdForSupport=0.15, subthreshold=9, editsub=-10, editins=-1, editdel=-1, editmatch=1, lookRange=15) indelRobot.tunePara() indelRobot.snprate = 0.01 return indelRobot
def arrangeSeqBasedOnRefEasy(motherGenome, reconstructedGenome, parameterRobot): runningI = 0 runningJ = 0 G = len(motherGenome) W = 50 totalScore = 0 parameterRobot = common.parameterRobot() counter = 0 scoreList = [] while runningI < len(motherGenome) - 1 and counter < parameterRobot.G * 1.1: score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( motherGenome[runningI:runningI + W], reconstructedGenome[runningJ:runningJ + W], parameterRobot) scoreList.append([score, starti, startj, endi, endj, runningI]) runningI = runningI + W counter = counter + 1 scoreList = sorted(scoreList) testCases = [] for i in range(3): score, starti, startj, endi, endj, runningI = scoreList[-(i + 1)] newGenome = np.zeros(G, dtype=np.int32) myindex = runningI + starti - startj - runningJ if myindex > 0: newGenome[0:G - myindex] = motherGenome[myindex:G] newGenome[G - myindex:G] = motherGenome[0:myindex] else: myposindex = G + myindex newGenome[0:G - myposindex] = motherGenome[myposindex:G] newGenome[G - myposindex:G] = motherGenome[0:myposindex] testCases.append(newGenome) return testCases
def findMismatchNumber(motherGenome, reconstructedGenome): ## Fill in the algorithm here # 1) Sliding window matching and count # 2) Make sure to not include the loop around thing. W = 50 runningI = 0 runningJ = 0 G = len(motherGenome) totalScore = 0 parameterRobot = common.parameterRobot() counter = 0 tmprunningI = -1 print "----------------------" overallSum = 0 while runningI < len(motherGenome) - 1 and tmprunningI != runningI: score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignmentFixRef( motherGenome[runningI:runningI + W], reconstructedGenome[runningJ:runningJ + W], parameterRobot) #print "Mother: " #cluster.printSeq(returnalignedSeq1) #print "Reconstructed: " #cluster.printSeq(returnalignedSeq2) #print "----------" tmprunningI = runningI runningI = endi + runningI runningJ = endj + runningJ counter = counter + 1 totalScore = totalScore + score #print "score", score overallSum += score print "----------------------" numberOfMismatch = max((G - totalScore) / 2, 0) print overallSum / counter return numberOfMismatch
def findMismatchNumber(motherGenome, reconstructedGenome): ## Fill in the algorithm here # 1) Sliding window matching and count # 2) Make sure to not include the loop around thing. W= 50 runningI = 0 runningJ = 0 G = len(motherGenome) totalScore = 0 parameterRobot = common.parameterRobot() counter = 0 tmprunningI = -1 print "----------------------" overallSum = 0 while runningI < len(motherGenome) -1 and tmprunningI != runningI: score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignmentFixRef(motherGenome[runningI:runningI+W] , reconstructedGenome[runningJ:runningJ+W], parameterRobot) #print "Mother: " #cluster.printSeq(returnalignedSeq1) #print "Reconstructed: " #cluster.printSeq(returnalignedSeq2) #print "----------" tmprunningI = runningI runningI = endi + runningI runningJ = endj + runningJ counter = counter +1 totalScore = totalScore + score #print "score", score overallSum += score print "----------------------" numberOfMismatch = max((G - totalScore)/2, 0 ) print overallSum / counter return numberOfMismatch
def arrangeSeqBasedOnRefEasy(motherGenome, reconstructedGenome, parameterRobot): runningI = 0 runningJ = 0 G = len(motherGenome) W = 50 totalScore = 0 parameterRobot = common.parameterRobot() counter = 0 scoreList = [] while runningI < len(motherGenome) -1 and counter < parameterRobot.G*1.1: score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(motherGenome[runningI:runningI+W] , reconstructedGenome[runningJ:runningJ+W], parameterRobot) scoreList.append([score,starti, startj , endi, endj,runningI ]) runningI = runningI + W counter = counter +1 scoreList = sorted(scoreList) testCases = [] for i in range(3): score, starti, startj , endi, endj,runningI = scoreList[-(i+1)] newGenome = np.zeros(G, dtype = np.int32) myindex = runningI +starti - startj-runningJ if myindex > 0 : newGenome[0:G-myindex] = motherGenome[myindex:G] newGenome[G-myindex:G] = motherGenome[0:myindex] else: myposindex = G+ myindex newGenome[0:G-myposindex] = motherGenome[myposindex:G] newGenome[G-myposindex:G] = motherGenome[0:myposindex] testCases.append(newGenome) return testCases
def indelMSABridging(f2, currentNode,noisyReads,p, snpRate,flankinglen, parameterRobot): # Need to loop over all the xnodes canResolve, kmerPairsList = False, [] # Using contig Creator indelRobot = common.parameterRobot() indelRobot.defaultFolder = parameterRobot.defaultFolder indelRobot.setReadStat( Nshort= parameterRobot.N, Nlong= parameterRobot.N, Lshort= parameterRobot.L, Llong= parameterRobot.L, p= parameterRobot.p , longOnly = True) indelRobot.setGenomeStat(G = parameterRobot.G, lrep=500, lsnp=200, lint=50 ) indelRobot.setThresholdPara(liid = 30, thresForRandom= 0.5,thresForins =0.4, thresFordel=0.4, insMin=4, delMin=4,thresholdForSupport= 0.15, subthreshold= 9, editsub= -10, editins= -1, editdel= -1, editmatch = 1, lookRange =15) #indelRobot.tunePara() indelRobot.snprate = snpRate # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList # shortToLongMap : indexlong indexshort jstart jend istart iend shortToLongMap,toProcessList = [], [] toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot, flankinglen) if len(toProcessList[4] ) == 0 : return False, [] shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList) cleaner.cleaning([noisyReads,noisyReads] ,shortToLongMap, toProcessList,indelRobot, "init") in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning([noisyReads, noisyReads],shortToLongMap, toProcessList,indelRobot, "vote") extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList,indelRobot,longReadToUse, True) if extendResult == 0 : canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) elif extendResult == 1 : canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) elif extendResult == -1: canResolve = False kmerPairsList = [] print kmerPairsList return canResolve, kmerPairsList
def indelMSABridging(f2, currentNode, noisyReads, p, snpRate, flankinglen, parameterRobot): # Need to loop over all the xnodes canResolve, kmerPairsList = False, [] # Using contig Creator indelRobot = common.parameterRobot() indelRobot.defaultFolder = parameterRobot.defaultFolder indelRobot.setReadStat(Nshort=parameterRobot.N, Nlong=parameterRobot.N, Lshort=parameterRobot.L, Llong=parameterRobot.L, p=parameterRobot.p, longOnly=True) indelRobot.setGenomeStat(G=parameterRobot.G, lrep=500, lsnp=200, lint=50) indelRobot.setThresholdPara(liid=30, thresForRandom=0.5, thresForins=0.4, thresFordel=0.4, insMin=4, delMin=4, thresholdForSupport=0.15, subthreshold=9, editsub=-10, editins=-1, editdel=-1, editmatch=1, lookRange=15) #indelRobot.tunePara() indelRobot.snprate = snpRate # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList # shortToLongMap : indexlong indexshort jstart jend istart iend shortToLongMap, toProcessList = [], [] toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot, flankinglen) if len(toProcessList[4]) == 0: return False, [] shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList) cleaner.cleaning([noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot, "init") in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning( [noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot, "vote") extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList, indelRobot, longReadToUse, True) if extendResult == 0: canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) elif extendResult == 1: canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) elif extendResult == -1: canResolve = False kmerPairsList = [] print kmerPairsList return canResolve, kmerPairsList