Пример #1
0
def formResolveListForMatch(folderName, newInList, newOutList, sd, contigReadGraph, N1):
    resolvedList = []

    for eachitem in newInList:
        found = satisfyMatch(eachitem, newOutList, sd)

        if found != -1:
            leftCtgIndex, rightCtgIndex = eachitem[0], found
            succReadsList = abunGraphLib.findPathBtwEnds(folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1)

            if succReadsList != None:
                resolvedList.append([eachitem[0], found])

    return resolvedList
Пример #2
0
def formResolveListForMatch(folderName, newInList, newOutList, sd,
                            contigReadGraph, N1):
    resolvedList = []

    for eachitem in newInList:
        found = satisfyMatch(eachitem, newOutList, sd)

        if found != -1:
            leftCtgIndex, rightCtgIndex = eachitem[0], found
            succReadsList = abunGraphLib.findPathBtwEnds(
                folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1)

            if succReadsList != None:
                resolvedList.append([eachitem[0], found])

    return resolvedList
Пример #3
0
def xNodeResolving(folderName, contigReadGraph):

    ### Init G, myCountDic, N1
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    with open(folderName + "myCountDic.json") as f:
        myCountDic = json.load(f)

    N1 = len(myCountDic) * 2

    ### Add resolved edge

    adj = [[] for i in range(N1)]

    for i in range(N1):
        adj[i] = abunGraphLib.findAllReachable(i, N1, G)

    Gnew = graphLib.seqGraph(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1)

    extraCounter = 0
    mapDummyToRealDic = {}
    resolvedList = []

    for v in Gnew.graphNodesList:

        inList = []
        for eachitem in v.listOfPrevNodes:
            inList.append(eachitem[0])

        outList = []
        for eachitem in v.listOfNextNodes:
            outList.append(eachitem[0])

        inListCt = getCtTwoToOne(inList, myCountDic)
        outListCt = getCtTwoToOne(outList, myCountDic)

        sizeList = []
        for eachitem in myCountDic:
            sizeList.append(myCountDic[eachitem])

        sd = np.std(sizeList)

        for eachIn in inListCt:
            matchedOut = satisfyMatch(eachIn, outListCt, sd)

            if matchedOut != -1:
                leftCtgIndex, rightCtgIndex = eachIn[0], v.nodeIndex
                inSuccReadsList = abunGraphLib.findPathBtwEnds(
                    folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1
                )

                leftCtgIndex, rightCtgIndex = v.nodeIndex, matchedOut
                outSuccReadsList = abunGraphLib.findPathBtwEnds(
                    folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1
                )

                if inSuccReadsList != None and outSuccReadsList != None:

                    resolvedList.append([eachIn[0]] + inSuccReadsList + [N1 + extraCounter])
                    print "in: ", resolvedList[-1]

                    resolvedList.append([N1 + extraCounter] + outSuccReadsList + [matchedOut])
                    print "out: ", resolvedList[-1]

                    mapDummyToRealDic[extraCounter] = v.nodeIndex
                    extraCounter = extraCounter + 1

    return resolvedList, mapDummyToRealDic
Пример #4
0
def singleGapLookUp(eachmatchpair, folderName, N1, mummerLink, contigReadGraph, contigFilename, readsetFilename):

    print eachmatchpair
    leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent = eachmatchpair[0], eachmatchpair[-1], 0, 0, ""

    succReadsList = abunGraphLib.findPathBtwEnds(folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1)

    succReadsList = []
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    allPaths = abunGraphLib.findAllPathK(leftCtgIndex, rightCtgIndex, G, 5)
    # shuffle(allPaths)

    print "allPaths", allPaths

    possibleList = []
    for p in allPaths:
        noContig = True
        for pp in p[1:-1]:
            if pp < N1:
                noContig = False
        if noContig == True:
            possibleList.append(p)
    print "possibleList", possibleList

    minListLen = 1000
    for p in possibleList:
        if len(p) < minListLen:
            succReadsList = p
            minListLen = len(p)

    if len(succReadsList) > 0:
        succReadsList.pop(0)
        succReadsList.pop(-1)
    else:
        print "interesting item for future study"

    print "succReadsList", succReadsList

    if len(succReadsList) == 0:
        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1)
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )

        print "overlap contig : ", overlap

        leftEnd = len(leftSeg) - overlap[0]
        middleContent = ""

    else:

        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1)
        print contigName
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        readName = abunHouseKeeper.parseIDToName(succReadsList[0], "R", N1)
        print readName
        rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )

        print "overlap start read : ", overlap

        leftEnd = len(leftSeg) - overlap[0]

        middleContent = ""

        for i in range(len(succReadsList) - 1):
            readName = abunHouseKeeper.parseIDToName(succReadsList[i], "R", N1)
            leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

            readName = abunHouseKeeper.parseIDToName(succReadsList[i + 1], "R", N1)
            rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

            overlap = IORobot.alignWithName(
                leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
            )
            print "overlap middle read : ", overlap
            middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]]

        readName = abunHouseKeeper.parseIDToName(succReadsList[-1], "R", N1)
        leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )
        print "overlap end read : ", overlap

        middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]]

    return [leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent]
Пример #5
0
def xNodeResolving(folderName, contigReadGraph):
    '''
    Input : contigGraph , abunInfo , folderName  

    Output: myresolvedList.json, gapContentLookUp.json, dummyNodeMapping.json

    Algorithm :
        1) Tranverse the graph 
            a) If the node can well be fixed with sd requirement met 
                i) Link it across and add the pair into the myresolvedList, gapContentLookUp
                ii) Add dummynodes and fill in the dummyNodeMapping 
        
        2) Format return and output as temp file 
    '''

    ### Init G, myCountDic, N1
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    with open(folderName + 'myCountDic.json') as f:
        myCountDic = json.load(f)

    N1 = len(myCountDic) * 2

    ### Add resolved edge

    adj = [[] for i in range(N1)]

    for i in range(N1):
        adj[i] = abunGraphLib.findAllReachable(i, N1, G)

    Gnew = graphLib.seqGraph(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1)

    extraCounter = 0
    mapDummyToRealDic = {}
    resolvedList = []

    for v in Gnew.graphNodesList:

        inList = []
        for eachitem in v.listOfPrevNodes:
            inList.append(eachitem[0])

        outList = []
        for eachitem in v.listOfNextNodes:
            outList.append(eachitem[0])

        inListCt = getCtTwoToOne(inList, myCountDic)
        outListCt = getCtTwoToOne(outList, myCountDic)

        sizeList = []
        for eachitem in myCountDic:
            sizeList.append(myCountDic[eachitem])

        sd = np.std(sizeList)

        for eachIn in inListCt:
            matchedOut = satisfyMatch(eachIn, outListCt, sd)

            if matchedOut != -1:
                leftCtgIndex, rightCtgIndex = eachIn[0], v.nodeIndex
                inSuccReadsList = abunGraphLib.findPathBtwEnds(
                    folderName, leftCtgIndex, rightCtgIndex, contigReadGraph,
                    N1)

                leftCtgIndex, rightCtgIndex = v.nodeIndex, matchedOut
                outSuccReadsList = abunGraphLib.findPathBtwEnds(
                    folderName, leftCtgIndex, rightCtgIndex, contigReadGraph,
                    N1)

                if inSuccReadsList != None and outSuccReadsList != None:

                    resolvedList.append([eachIn[0]] + inSuccReadsList +
                                        [N1 + extraCounter])
                    print "in: ", resolvedList[-1]

                    resolvedList.append([N1 + extraCounter] +
                                        outSuccReadsList + [matchedOut])
                    print "out: ", resolvedList[-1]

                    mapDummyToRealDic[extraCounter] = v.nodeIndex
                    extraCounter = extraCounter + 1

    return resolvedList, mapDummyToRealDic