示例#1
0
def createIndelRobot(folderName):
    indelRobot = common.parameterRobot()
    indelRobot.defaultFolder = folderName
    # indelRobot.setReadStat( Nshort= parameterRobot.N, Nlong=  parameterRobot.N, Lshort= parameterRobot.L, Llong= parameterRobot.L, p= parameterRobot.p , longOnly = True)
    # indelRobot.setGenomeStat(G = parameterRobot.G, lrep=500, lsnp=200, lint=50 )
    indelRobot.setThresholdPara(liid=30, thresForRandom=0.5, thresForins=0.4, thresFordel=0.4, insMin=4, delMin=4, thresholdForSupport=0.15, subthreshold=9, editsub=-10, editins=-1, editdel=-1, editmatch=1, lookRange=15)
    indelRobot.tunePara()
    indelRobot.snprate = 0.01
    
    return indelRobot
示例#2
0
def arrangeSeqBasedOnRefEasy(motherGenome, reconstructedGenome,
                             parameterRobot):
    runningI = 0
    runningJ = 0
    G = len(motherGenome)

    W = 50
    totalScore = 0
    parameterRobot = common.parameterRobot()
    counter = 0
    scoreList = []

    while runningI < len(motherGenome) - 1 and counter < parameterRobot.G * 1.1:
        score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment(
            motherGenome[runningI:runningI + W],
            reconstructedGenome[runningJ:runningJ + W], parameterRobot)
        scoreList.append([score, starti, startj, endi, endj, runningI])

        runningI = runningI + W
        counter = counter + 1

    scoreList = sorted(scoreList)

    testCases = []
    for i in range(3):
        score, starti, startj, endi, endj, runningI = scoreList[-(i + 1)]

        newGenome = np.zeros(G, dtype=np.int32)
        myindex = runningI + starti - startj - runningJ

        if myindex > 0:
            newGenome[0:G - myindex] = motherGenome[myindex:G]
            newGenome[G - myindex:G] = motherGenome[0:myindex]
        else:
            myposindex = G + myindex
            newGenome[0:G - myposindex] = motherGenome[myposindex:G]
            newGenome[G - myposindex:G] = motherGenome[0:myposindex]

        testCases.append(newGenome)

    return testCases
示例#3
0
def findMismatchNumber(motherGenome, reconstructedGenome):
    ## Fill in the algorithm here
    # 1)  Sliding window matching and count
    # 2)  Make sure to not include the loop around thing.

    W = 50

    runningI = 0
    runningJ = 0
    G = len(motherGenome)

    totalScore = 0
    parameterRobot = common.parameterRobot()
    counter = 0
    tmprunningI = -1
    print "----------------------"
    overallSum = 0
    while runningI < len(motherGenome) - 1 and tmprunningI != runningI:
        score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignmentFixRef(
            motherGenome[runningI:runningI + W],
            reconstructedGenome[runningJ:runningJ + W], parameterRobot)
        #print "Mother: "
        #cluster.printSeq(returnalignedSeq1)
        #print "Reconstructed: "
        #cluster.printSeq(returnalignedSeq2)
        #print "----------"

        tmprunningI = runningI

        runningI = endi + runningI
        runningJ = endj + runningJ

        counter = counter + 1
        totalScore = totalScore + score
        #print "score", score
        overallSum += score
    print "----------------------"
    numberOfMismatch = max((G - totalScore) / 2, 0)
    print overallSum / counter

    return numberOfMismatch
示例#4
0
def findMismatchNumber(motherGenome, reconstructedGenome):
    ## Fill in the algorithm here 
    # 1)  Sliding window matching and count
    # 2)  Make sure to not include the loop around thing. 
    
    W= 50 
    
    runningI = 0 
    runningJ = 0 
    G = len(motherGenome)
    

    totalScore = 0 
    parameterRobot = common.parameterRobot()
    counter = 0
    tmprunningI = -1
    print "----------------------"
    overallSum = 0
    while runningI < len(motherGenome) -1 and tmprunningI != runningI:
        score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignmentFixRef(motherGenome[runningI:runningI+W] , reconstructedGenome[runningJ:runningJ+W], parameterRobot)
        #print "Mother: "
        #cluster.printSeq(returnalignedSeq1)
        #print "Reconstructed: "
        #cluster.printSeq(returnalignedSeq2)
        #print "----------"
        
        tmprunningI = runningI
        
        runningI = endi  + runningI 
        runningJ = endj +  runningJ  
        
        counter  = counter +1 
        totalScore = totalScore + score 
        #print "score", score
        overallSum += score
    print "----------------------"
    numberOfMismatch = max((G - totalScore)/2, 0 )
    print overallSum / counter

    return numberOfMismatch 
示例#5
0
def arrangeSeqBasedOnRefEasy(motherGenome, reconstructedGenome, parameterRobot):
    runningI = 0 
    runningJ = 0
    G = len(motherGenome)
    
    W = 50
    totalScore = 0 
    parameterRobot = common.parameterRobot()
    counter = 0
    scoreList = []
    
    while runningI < len(motherGenome) -1 and counter < parameterRobot.G*1.1:
        score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(motherGenome[runningI:runningI+W] , reconstructedGenome[runningJ:runningJ+W], parameterRobot)
        scoreList.append([score,starti, startj , endi, endj,runningI ])
        
        runningI = runningI + W
        counter  = counter +1 
        
    scoreList = sorted(scoreList)

    testCases = [] 
    for i in range(3):
        score, starti, startj , endi, endj,runningI =  scoreList[-(i+1)]
    
        newGenome = np.zeros(G, dtype = np.int32)
        myindex = runningI +starti - startj-runningJ
                
        if myindex > 0 :
            newGenome[0:G-myindex] = motherGenome[myindex:G] 
            newGenome[G-myindex:G] = motherGenome[0:myindex]
        else:
            myposindex = G+ myindex
            newGenome[0:G-myposindex] = motherGenome[myposindex:G]
            newGenome[G-myposindex:G] = motherGenome[0:myposindex]

        testCases.append(newGenome)
        
    return testCases 
def indelMSABridging(f2, currentNode,noisyReads,p, snpRate,flankinglen, parameterRobot):
    
    # Need to loop over all the xnodes 
    canResolve, kmerPairsList = False, []
    
    # Using contig Creator 
    indelRobot = common.parameterRobot()
    indelRobot.defaultFolder = parameterRobot.defaultFolder
    indelRobot.setReadStat( Nshort= parameterRobot.N, Nlong=  parameterRobot.N, Lshort= parameterRobot.L, Llong= parameterRobot.L, p= parameterRobot.p , longOnly = True)
    indelRobot.setGenomeStat(G = parameterRobot.G, lrep=500, lsnp=200, lint=50 )
    indelRobot.setThresholdPara(liid = 30, thresForRandom= 0.5,thresForins =0.4, thresFordel=0.4, insMin=4, delMin=4,thresholdForSupport= 0.15, subthreshold= 9, editsub= -10, editins= -1, editdel= -1, editmatch = 1, lookRange =15)
    #indelRobot.tunePara()
    indelRobot.snprate = snpRate
    

    # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList
    # shortToLongMap : indexlong    indexshort    jstart    jend    istart    iend    
    
    shortToLongMap,toProcessList = [], []
    
    toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot, flankinglen)
    if len(toProcessList[4] ) == 0 : 
        return False, []
    
    shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList)
    
    
    cleaner.cleaning([noisyReads,noisyReads] ,shortToLongMap, toProcessList,indelRobot, "init")
    in1List, in2List, out1List, out2List, commonList, longReadToUse  = cleaner.cleaning([noisyReads, noisyReads],shortToLongMap, toProcessList,indelRobot, "vote")

    
    extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList,indelRobot,longReadToUse, True)
    
    
    
    if extendResult == 0 : 
        canResolve = True
        
        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
        
        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
    elif extendResult == 1 :
        canResolve = True
        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
        
        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
        
    elif extendResult == -1:
        canResolve = False
        kmerPairsList = []
        
    print kmerPairsList
    
    return canResolve, kmerPairsList 
def indelMSABridging(f2, currentNode, noisyReads, p, snpRate, flankinglen,
                     parameterRobot):

    # Need to loop over all the xnodes
    canResolve, kmerPairsList = False, []

    # Using contig Creator
    indelRobot = common.parameterRobot()
    indelRobot.defaultFolder = parameterRobot.defaultFolder
    indelRobot.setReadStat(Nshort=parameterRobot.N,
                           Nlong=parameterRobot.N,
                           Lshort=parameterRobot.L,
                           Llong=parameterRobot.L,
                           p=parameterRobot.p,
                           longOnly=True)
    indelRobot.setGenomeStat(G=parameterRobot.G, lrep=500, lsnp=200, lint=50)
    indelRobot.setThresholdPara(liid=30,
                                thresForRandom=0.5,
                                thresForins=0.4,
                                thresFordel=0.4,
                                insMin=4,
                                delMin=4,
                                thresholdForSupport=0.15,
                                subthreshold=9,
                                editsub=-10,
                                editins=-1,
                                editdel=-1,
                                editmatch=1,
                                lookRange=15)
    #indelRobot.tunePara()
    indelRobot.snprate = snpRate

    # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList
    # shortToLongMap : indexlong    indexshort    jstart    jend    istart    iend

    shortToLongMap, toProcessList = [], []

    toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot,
                                      flankinglen)
    if len(toProcessList[4]) == 0:
        return False, []

    shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot,
                                    toProcessList)

    cleaner.cleaning([noisyReads, noisyReads], shortToLongMap, toProcessList,
                     indelRobot, "init")
    in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning(
        [noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot,
        "vote")

    extendResult = extender.readExtender(in1List, in2List, out1List, out2List,
                                         commonList, indelRobot, longReadToUse,
                                         True)

    if extendResult == 0:
        canResolve = True

        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])

        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])
    elif extendResult == 1:
        canResolve = True
        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])

        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])

    elif extendResult == -1:
        canResolve = False
        kmerPairsList = []

    print kmerPairsList

    return canResolve, kmerPairsList