示例#1
0
def adjListToRepeatList(newAdjacencyList, folderName, repeatFilename):

    N1 = len(newAdjacencyList)

    G2 = abunGraphLib.seqGraphWt(N1 * 2)
    for i in range(N1):
        for j in newAdjacencyList[i]:
            G2.insertEdge(2 * i, 2 * j + 1, 1)
            G2.insertEdge(2 * j + 1, 2 * i, 1)

    clusters = G2.findConnectedComponents()

    repeatList = []
    for eachitem in clusters:
        leftList, rightList = [], []
        for eachsubitem in eachitem:
            if eachsubitem % 2 == 0:
                leftList.append(eachsubitem)
            else:
                rightList.append(eachsubitem)

        repeatList.append([
            abunHouseKeeper.getDistinct(leftList),
            abunHouseKeeper.getDistinct(rightList)
        ])

    with open(folderName + repeatFilename, 'w') as outfile:
        json.dump(repeatList, outfile)

    json_data = open(folderName + repeatFilename, 'r')
    loadData = json.load(json_data)

    assert (loadData == repeatList)
示例#2
0
def formConfirmReadResolve(folderName, inList, outList, G, Grev, N1):
    #print "formConfirmReadResolve"

    resolvedList = []
    confirmingReadList = []
    brLFlankList = []
    brRFlankList = []

    ### Find possible candidate reads
    print "inList , outList formConfirmReadResolve()", inList, outList
    for eachin in inList:
        for eachout in outList:
            pathList = abunGraphLib.findAllPathK(eachin, eachout, G, 3)
            for path in pathList:
                if len(path) == 3 and path[1] >= N1:
                    R = path[1]
                    confirmingReadList.append(R)
                    brLFlankList.append([eachin, R])
                    brRFlankList.append([eachout, R])

    ### Filter simple false cases
    toUseReadDic = {}
    confirmingReadList.sort()
    for key, items in groupby(confirmingReadList):
        toUseReadDic[str(key)] = True

    newbrLFlankList = abunHouseKeeper.getDistinct(brLFlankList)
    newbrLFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrLFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    newbrRFlankList = abunHouseKeeper.getDistinct(brRFlankList)
    newbrRFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrRFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    finalSearchReadList = []
    for eachitem in toUseReadDic:
        if toUseReadDic[eachitem] == True:
            finalSearchReadList.append(int(eachitem))

    ### Check paths to confirm all false cases
    for eachR in finalSearchReadList:
        l1 = abunGraphLib.findAllReachable(eachR, N1, G)
        l2 = abunGraphLib.findAllReachable(eachR, N1, Grev)

        l1Distinct = abunHouseKeeper.getDistinct(l1)
        l2Distinct = abunHouseKeeper.getDistinct(l2)

        if len(l1Distinct) == 1 and len(l2Distinct) == 1:
            c1, c2 = l1Distinct[0], l2Distinct[0]
            resolvedList.append([c2, c1])

    return resolvedList
示例#3
0
def adjListToRepeatList(newAdjacencyList,folderName,repeatFilename):

    N1 = len(newAdjacencyList)

    G2 = abunGraphLib.seqGraphWt(N1 * 2)
    for i in range(N1):
        for j in newAdjacencyList[i]:
            G2.insertEdge(2 * i, 2 * j + 1, 1)
            G2.insertEdge(2 * j + 1, 2 * i, 1)

    clusters = G2.findConnectedComponents()
    
    repeatList = []
    for eachitem in clusters:
        leftList, rightList = [], []
        for eachsubitem in eachitem:
            if eachsubitem % 2 == 0 :
                leftList.append(eachsubitem)
            else:
                rightList.append(eachsubitem)
                
        
        repeatList.append([abunHouseKeeper.getDistinct(leftList), abunHouseKeeper.getDistinct(rightList)])
           
    with open(folderName + repeatFilename, 'w') as outfile:
        json.dump(repeatList, outfile)

    
    json_data = open(folderName + repeatFilename, 'r')
    loadData = json.load(json_data)
    
    
    assert(loadData == repeatList)
示例#4
0
def findAttachedReads(x, side, folderName, sortedContigList, sortedContigDic,
                      lenDicContig, lenDicRead):
    rList = []
    '''
	Format : 
	  [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [ IDY]  | [TAGS]
	=====================================================================================
       1      562  |      819     1418  |      562      600  |    84.72  | Contig0_d	Read121_d
       1      562  |     4077     3478  |      562      600  |    84.72  | Contig0_d	Read121_p
       1      564  |      656       68  |      564      589  |    90.13  | Contig0_d	Read382_d
       1      564  |     6996     7584  |      564      589  |    90.13  | Contig0_d	Read382_p
       1      571  |     1386      815  |      571      572  |    86.60  | Contig0_d	Read421_d

	'''

    thres = thresMiddleContig

    key = abunHouseKeeper.parseIDToName(x, 'C', 0)
    if key in sortedContigDic:
        tmp = sortedContigDic[key]

        while tmp < len(sortedContigList) and sortedContigList[tmp][-2] == key:
            eachsub = sortedContigList[tmp]
            if overlapCR(eachsub, side, thres, lenDicContig, lenDicRead):
                rList.append(eachsub[-1])

            tmp = tmp + 1

        distinctRList = abunHouseKeeper.getDistinct(rList)

    else:
        distinctRList = []
    return distinctRList
示例#5
0
def filterConfidResolve(resolvedList):
    newResolvedList = []
    resolvedList.sort()
    conThres = abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres
    print "conThres", conThres

    for key, items in groupby(resolvedList):
        tmpList = list(items)
        if len(tmpList) >= conThres:
            newResolvedList.append(key)

    if False:
        noConflict = resolveConflict(abunHouseKeeper.getDistinct(resolvedList))
        noConflict = abunHouseKeeper.getDistinct(noConflict)
        newResolvedList = abunHouseKeeper.getDistinct(newResolvedList)
        newResolvedList = abunHouseKeeper.getDistinct(intersect(newResolvedList, noConflict))

    return newResolvedList
示例#6
0
def resolveConflictX(listA, listB):
    resolvedList = [[] for i in range(len(listA))]
    print "len(listA), len(listB)", len(listA), len(listB)
    for i in range(len(listA)):
        combinedList = listA[i] + listB[i]
        newCombinedList = abunHouseKeeper.getDistinct(combinedList)
        tmpResolved = resolveConflict(newCombinedList)
        resolvedList[i] = tmpResolved

    return resolvedList
示例#7
0
def filterConfidResolve(resolvedList):
    newResolvedList = []
    resolvedList.sort()
    conThres = abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres
    print "conThres", conThres

    for key, items in groupby(resolvedList):
        tmpList = list(items)
        if len(tmpList) >= conThres:
            newResolvedList.append(key)

    if False:
        noConflict = resolveConflict(abunHouseKeeper.getDistinct(resolvedList))
        noConflict = abunHouseKeeper.getDistinct(noConflict)
        newResolvedList = abunHouseKeeper.getDistinct(newResolvedList)
        newResolvedList = abunHouseKeeper.getDistinct(
            intersect(newResolvedList, noConflict))

    return newResolvedList
示例#8
0
def resolveConflictX(listA, listB):
    resolvedList = [[] for i in range(len(listA))]
    print "len(listA), len(listB)", len(listA), len(listB)
    for i in range(len(listA)):
        combinedList = listA[i] + listB[i]
        newCombinedList = abunHouseKeeper.getDistinct(combinedList)
        tmpResolved = resolveConflict(newCombinedList)
        resolvedList[i] = tmpResolved

    return resolvedList
def findAttachedContigs(rList, side, folderName, sortedReadList, sortedReadDic, lenDicContig,lenDicRead):
	cList = [] 

	thres = thresMiddleContig
	for r in rList:
		if r in sortedReadDic:
			tmp = sortedReadDic[r]
			while tmp < len(sortedReadList) and sortedReadList[tmp][-1] == r:	
				eachsub = sortedReadList[tmp]
				if overlapCRJustREnd(eachsub, side, thres,  lenDicContig, lenDicRead):
					cList.append([eachsub[-2], r])
				tmp = tmp + 1

	newCList = abunHouseKeeper.getDistinct(cList)

	return newCList
示例#10
0
def findAttachedContigs(rList, side, folderName, sortedReadList, sortedReadDic,
                        lenDicContig, lenDicRead):
    cList = []

    thres = thresMiddleContig
    for r in rList:
        if r in sortedReadDic:
            tmp = sortedReadDic[r]
            while tmp < len(sortedReadList) and sortedReadList[tmp][-1] == r:
                eachsub = sortedReadList[tmp]
                if overlapCRJustREnd(eachsub, side, thres, lenDicContig,
                                     lenDicRead):
                    cList.append([eachsub[-2], r])
                tmp = tmp + 1

    newCList = abunHouseKeeper.getDistinct(cList)

    return newCList
示例#11
0
def findAttachedReads(x, side, folderName,sortedContigList,sortedContigDic, lenDicContig,lenDicRead):
	rList = [] 

	
	'''
	Format : 
	  [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [ IDY]  | [TAGS]
	=====================================================================================
       1      562  |      819     1418  |      562      600  |    84.72  | Contig0_d	Read121_d
       1      562  |     4077     3478  |      562      600  |    84.72  | Contig0_d	Read121_p
       1      564  |      656       68  |      564      589  |    90.13  | Contig0_d	Read382_d
       1      564  |     6996     7584  |      564      589  |    90.13  | Contig0_d	Read382_p
       1      571  |     1386      815  |      571      572  |    86.60  | Contig0_d	Read421_d

	'''


	thres = thresMiddleContig

	key = abunHouseKeeper.parseIDToName(x, 'C', 0)
	if key in sortedContigDic:
		tmp = sortedContigDic[key]
		
		while tmp < len(sortedContigList) and sortedContigList[tmp][-2] == key:
			eachsub = sortedContigList[tmp]
			if overlapCR(eachsub, side, thres, lenDicContig,lenDicRead):
				rList.append(eachsub[-1])

			tmp = tmp + 1


		distinctRList = abunHouseKeeper.getDistinct(rList)

	else:
		distinctRList = []
	return distinctRList
示例#12
0
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph,
                          contigFilename, readsetFilename):
    N1 = len(myCountDic) * 2
    print "N1", N1

    # Debug
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    adj = [[] for i in range(N1)]

    for i in range(N1):
        adj[i] = abunGraphLib.findAllReachable(i, N1, G)

    Gnew = graphLib.seqGraph(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1)

    Gnew.reportEdge()
    # End Debug

    if False:
        json_data = open(folderName + "phaseRepeat.txt", 'r')
        repeatPairs = json.load(json_data)
        repeatPairs = obtainNonEmpty(repeatPairs)

        biResolvedCombineList = []
        for eachitem in repeatPairs:
            inList, outList = eachitem[0], eachitem[1]
            resolvedList = determindMatch(inList, outList, myCountDic,
                                          folderName, contigReadGraph, N1)

            biResolvedCombineList += resolvedList

        ### Xnode repeatResolution
        xResolvedList, mapDummyToRealDic = xNodeResolving(
            folderName, contigReadGraph)

        ### Combine resolution
        resolvedList = xResolvedList + biResolvedCombineList
        resolvedList = abunHouseKeeper.getDistinct(resolvedList)
        print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len(
            resolvedList), len(xResolvedList), len(biResolvedCombineList)

        with open(folderName + "resolvedList.json", 'w') as f:
            json.dump(resolvedList, f)

        with open(folderName + "mapDummyToRealDic.json", 'w') as f:
            json.dump(mapDummyToRealDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", 'r')
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", 'r')
        mapDummyToRealDic = json.load(json_data)

        gapContentLookUpList = []
        gapContentLookUpList = generateGapContentLookup(
            folderName, mummerLink, resolvedList, contigReadGraph,
            contigFilename, readsetFilename, mapDummyToRealDic)
        gapContentLookUpDic = {}
        gapContentLookUpList.sort()

        for eachitem in gapContentLookUpList:
            gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [
                eachitem[2], eachitem[3], eachitem[4]
            ]
            print eachitem[2:4], len(eachitem[4])

        with open(folderName + "gapContentLookUpDic.json", 'w') as f:
            json.dump(gapContentLookUpDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", 'r')
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", 'r')
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(N1 + len(mapDummyToRealDic))
        addEdges(G, resolvedList)
        G.condense()

        G.saveToFile(folderName, "xResolvedGraph")

    if False:
        json_data = open(folderName + "mapDummyToRealDic.json", 'r')
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(0)
        G.loadFromFile(folderName, "xResolvedGraph")

        json_data = open(folderName + "gapContentLookUpDic.json", 'r')
        gapContentLookUpDic = json.load(json_data)

        print "Final step: really hacking a file"
        os.system("cp " + folderName + contigFilename + "_Double.fasta " +
                  folderName + "tmpWithDummy.fasta")
        contigList = IORobot.readContigsFromFile(
            folderName, contigFilename + "_Double.fasta")

        f = open(folderName + "tmpWithDummy.fasta", 'a')
        for i in range(len(mapDummyToRealDic)):
            id = mapDummyToRealDic[str(i)]
            f.write(">SegDum" + str(i) + "\n")
            f.write(contigList[id] + "\n")
        f.close()

        IORobot.extractGraphToContigs(G, folderName, mummerLink, "abun.fasta",
                                      "tmpWithDummy.fasta",
                                      gapContentLookUpDic, mapDummyToRealDic)
示例#13
0
def identifyRepeat(folderName, mummerLink,contigFilename,contigReadGraph, repeatFilename, optionToRun  ):
    '''
    Input : Graph --- phaseStringGraph1
    Output: repeat pairs { [ (1,2), (3,4) ] , [(5,6),(7,8)] } 
    Algorithm: 
        a) Reachability test on the graph to find the partners
        b) Form Bipartite graph
        c) Find connected component in the bipartite and define as repeat pairs

    '''
    
    # ## (a) reachability test to find partners 
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)
    # G.reportEdge()
    lenDicCC = IORobot.obtainLength(folderName, contigFilename+"_Double.fasta")
    
    adjacencyList = [[] for i in range(len(lenDicCC))]
    
    N1 = len(lenDicCC)
    
    
    # # Debug
    # for i in range(14):
    #    debugGraphPath(i, 2, G, N1)
    # # End Debug
    
    for i in range(len(lenDicCC)):
        adjacencyList[i] = abunGraphLib.findAllReachable(i, N1, G) 
        print "i, adjacencyList[i] : ", i , adjacencyList[i]
    
    # ## (b) formation of bipartite graph
    if optionToRun == "tandem" :
        newAdjacencyList = adjacencyList
    elif optionToRun == "xphase": 
        newAdjacencyList = abunGraphLib.filterEdge(adjacencyList, folderName, contigFilename)
    
    G2 = abunGraphLib.seqGraphWt(N1 * 2)
    for i in range(N1):
        for j in newAdjacencyList[i]:
            G2.insertEdge(2 * i, 2 * j + 1, 1)
            G2.insertEdge(2 * j + 1, 2 * i, 1)

    clusters = G2.findConnectedComponents()
    
    repeatList = []
    for eachitem in clusters:
        leftList, rightList = [], []
        for eachsubitem in eachitem:
            if eachsubitem % 2 == 0 :
                leftList.append(eachsubitem)
            else:
                rightList.append(eachsubitem)
                
        
        repeatList.append([abunHouseKeeper.getDistinct(leftList), abunHouseKeeper.getDistinct(rightList)])
           
    with open(folderName + repeatFilename, 'w') as outfile:
        json.dump(repeatList, outfile)

    
    json_data = open(folderName + repeatFilename, 'r')
    loadData = json.load(json_data)
    
    
    assert(loadData == repeatList)
    
示例#14
0
def identifyRepeat(folderName, mummerLink, contigFilename, contigReadGraph,
                   repeatFilename, optionToRun):
    '''
    Input : Graph --- phaseStringGraph1
    Output: repeat pairs { [ (1,2), (3,4) ] , [(5,6),(7,8)] } 
    Algorithm: 
        a) Reachability test on the graph to find the partners
        b) Form Bipartite graph
        c) Find connected component in the bipartite and define as repeat pairs

    '''

    # ## (a) reachability test to find partners
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)
    # G.reportEdge()
    lenDicCC = IORobot.obtainLength(folderName,
                                    contigFilename + "_Double.fasta")

    adjacencyList = [[] for i in range(len(lenDicCC))]

    N1 = len(lenDicCC)

    # # Debug
    # for i in range(14):
    #    debugGraphPath(i, 2, G, N1)
    # # End Debug

    for i in range(len(lenDicCC)):
        adjacencyList[i] = abunGraphLib.findAllReachable(i, N1, G)
        print "i, adjacencyList[i] : ", i, adjacencyList[i]

    # ## (b) formation of bipartite graph
    if optionToRun == "tandem":
        newAdjacencyList = adjacencyList
    elif optionToRun == "xphase":
        newAdjacencyList = abunGraphLib.filterEdge(adjacencyList, folderName,
                                                   contigFilename)

    G2 = abunGraphLib.seqGraphWt(N1 * 2)
    for i in range(N1):
        for j in newAdjacencyList[i]:
            G2.insertEdge(2 * i, 2 * j + 1, 1)
            G2.insertEdge(2 * j + 1, 2 * i, 1)

    clusters = G2.findConnectedComponents()

    repeatList = []
    for eachitem in clusters:
        leftList, rightList = [], []
        for eachsubitem in eachitem:
            if eachsubitem % 2 == 0:
                leftList.append(eachsubitem)
            else:
                rightList.append(eachsubitem)

        repeatList.append([
            abunHouseKeeper.getDistinct(leftList),
            abunHouseKeeper.getDistinct(rightList)
        ])

    with open(folderName + repeatFilename, 'w') as outfile:
        json.dump(repeatList, outfile)

    json_data = open(folderName + repeatFilename, 'r')
    loadData = json.load(json_data)

    assert (loadData == repeatList)
示例#15
0
def BResolution(Gnew, folderName, contigReadGraph, N1, myCountDic, lenDic):

    if abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve:
        print "abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve", abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve
        maxRThres = abunHouseKeeper.abunGlobalSplitParameterRobot.RThres

        repeatFinder.adjListToRepeatList(Gnew.adj, folderName, "phaseRepeatTR.txt")

        json_data = open(folderName + "phaseRepeatTR.txt", "r")
        repeatPairs = json.load(json_data)

        repeatPairs = obtainNonEmpty(repeatPairs)

        biResolvedCombineList = []

        G = graphLib.seqGraph(0)
        G.loadFromFile(folderName, contigReadGraph)

        Grev = abunGraphLib.formReverseGraphFast(G)

        abunAnalysisList = []

        for eachitem in repeatPairs:
            inList, outList = eachitem[0], eachitem[1]
            resolvedList, brResolvedList = [], []

            if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAbunB:
                if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB > 0:
                    abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLower = (
                        abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB
                    )

                if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB > 0:
                    abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpper = (
                        abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB
                    )

                if not abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAggB:
                    resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1)
                else:

                    resolvedList = determindMatchAggregate(
                        inList, outList, myCountDic, folderName, contigReadGraph, N1, Gnew, lenDic
                    )

            if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunBRB:
                if abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB > 0:
                    abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres = (
                        abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB
                    )

                brResolvedList = formBRReolve(folderName, inList, outList, G, Grev, True, N1)

            combinedList = abunHouseKeeper.getDistinct(resolvedList + brResolvedList)

            print "resolvedList, brResolvedList, inList, outList", resolvedList, brResolvedList, inList, outList

            print "resolveConflict(combinedList)", resolveConflict(combinedList)

            abunAnalysisList.append([inList, outList, resolvedList, brResolvedList, resolveConflict(combinedList)])
            if len(inList) <= maxRThres and len(outList) <= maxRThres and len(inList) > 0 and len(outList) > 0:
                # biResolvedCombineList += resolveConflict(combinedList)

                resolvedCombine = resolveConflict(combinedList)
                ### kkdebug
                Gnew.bipartiteLocalResolve(resolvedCombine, inList, outList, folderName)

        # json_data = open(folderName + "hackBRResolveList.json", 'r')
        # dataItem = json.load(json_data)
        # Gnew.bipartiteResolve(dataItem)

        ### end kkdebug
        Gnew.condense()

        with open(folderName + "biResolvedCombineList.json", "w") as f:
            json.dump(biResolvedCombineList, f)

        with open(folderName + "abunAnalysisList.json", "w") as f:
            json.dump(abunAnalysisList, f)

        # assert(1==2)

        return Gnew

    else:
        return Gnew
示例#16
0
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph, contigFilename, readsetFilename):
    N1 = len(myCountDic) * 2
    print "N1", N1

    # Debug
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    adj = [[] for i in range(N1)]

    for i in range(N1):
        adj[i] = abunGraphLib.findAllReachable(i, N1, G)

    Gnew = graphLib.seqGraph(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1)

    Gnew.reportEdge()
    # End Debug

    if False:
        json_data = open(folderName + "phaseRepeat.txt", "r")
        repeatPairs = json.load(json_data)
        repeatPairs = obtainNonEmpty(repeatPairs)

        biResolvedCombineList = []
        for eachitem in repeatPairs:
            inList, outList = eachitem[0], eachitem[1]
            resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1)

            biResolvedCombineList += resolvedList

        ### Xnode repeatResolution
        xResolvedList, mapDummyToRealDic = xNodeResolving(folderName, contigReadGraph)

        ### Combine resolution
        resolvedList = xResolvedList + biResolvedCombineList
        resolvedList = abunHouseKeeper.getDistinct(resolvedList)
        print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len(
            resolvedList
        ), len(xResolvedList), len(biResolvedCombineList)

        with open(folderName + "resolvedList.json", "w") as f:
            json.dump(resolvedList, f)

        with open(folderName + "mapDummyToRealDic.json", "w") as f:
            json.dump(mapDummyToRealDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", "r")
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", "r")
        mapDummyToRealDic = json.load(json_data)

        gapContentLookUpList = []
        gapContentLookUpList = generateGapContentLookup(
            folderName, mummerLink, resolvedList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic
        )
        gapContentLookUpDic = {}
        gapContentLookUpList.sort()

        for eachitem in gapContentLookUpList:
            gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]]
            print eachitem[2:4], len(eachitem[4])

        with open(folderName + "gapContentLookUpDic.json", "w") as f:
            json.dump(gapContentLookUpDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", "r")
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", "r")
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(N1 + len(mapDummyToRealDic))
        addEdges(G, resolvedList)
        G.condense()

        G.saveToFile(folderName, "xResolvedGraph")

    if False:
        json_data = open(folderName + "mapDummyToRealDic.json", "r")
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(0)
        G.loadFromFile(folderName, "xResolvedGraph")

        json_data = open(folderName + "gapContentLookUpDic.json", "r")
        gapContentLookUpDic = json.load(json_data)

        print "Final step: really hacking a file"
        os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta")
        contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta")

        f = open(folderName + "tmpWithDummy.fasta", "a")
        for i in range(len(mapDummyToRealDic)):
            id = mapDummyToRealDic[str(i)]
            f.write(">SegDum" + str(i) + "\n")
            f.write(contigList[id] + "\n")
        f.close()

        IORobot.extractGraphToContigs(
            G, folderName, mummerLink, "abun.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic
        )
示例#17
0
def formConfirmReadResolve(folderName, inList, outList, G, Grev, N1):
    # print "formConfirmReadResolve"

    resolvedList = []
    confirmingReadList = []
    brLFlankList = []
    brRFlankList = []

    ### Find possible candidate reads
    print "inList , outList formConfirmReadResolve()", inList, outList
    for eachin in inList:
        for eachout in outList:
            pathList = abunGraphLib.findAllPathK(eachin, eachout, G, 3)

            for path in pathList:

                if len(path) == 3 and path[1] >= N1:
                    R = path[1]
                    confirmingReadList.append(R)
                    brLFlankList.append([eachin, R])
                    brRFlankList.append([eachout, R])

    ### Filter simple false cases
    toUseReadDic = {}
    confirmingReadList.sort()
    for key, items in groupby(confirmingReadList):
        toUseReadDic[str(key)] = True

    newbrLFlankList = abunHouseKeeper.getDistinct(brLFlankList)
    newbrLFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrLFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    newbrRFlankList = abunHouseKeeper.getDistinct(brRFlankList)
    newbrRFlankList.sort(key=itemgetter(1))

    for key, items in groupby(newbrRFlankList, itemgetter(1)):
        mylist = list(items)
        if len(mylist) > 1:
            toUseReadDic[str(key)] = False

    finalSearchReadList = []
    for eachitem in toUseReadDic:
        if toUseReadDic[eachitem] == True:
            finalSearchReadList.append(int(eachitem))

    ### Check paths to confirm all false cases
    for eachR in finalSearchReadList:
        l1 = abunGraphLib.findAllReachable(eachR, N1, G)
        l2 = abunGraphLib.findAllReachable(eachR, N1, Grev)

        l1Distinct = abunHouseKeeper.getDistinct(l1)
        l2Distinct = abunHouseKeeper.getDistinct(l2)

        if len(l1Distinct) == 1 and len(l2Distinct) == 1:
            c1, c2 = l1Distinct[0], l2Distinct[0]
            resolvedList.append([c2, c1])

    return resolvedList
示例#18
0
def BResolution(Gnew, folderName, contigReadGraph, N1, myCountDic, lenDic,
                mummerLink):

    if abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve:
        print "abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve", abunHouseKeeper.abunGlobalSplitParameterRobot.runBResolve
        maxRThres = abunHouseKeeper.abunGlobalSplitParameterRobot.RThres

        repeatFinder.adjListToRepeatList(Gnew.adj, folderName,
                                         "phaseRepeatTR.txt")

        json_data = open(folderName + "phaseRepeatTR.txt", 'r')
        repeatPairs = json.load(json_data)

        repeatPairs = obtainNonEmpty(repeatPairs)

        biResolvedCombineList = []

        G = abunGraphLib.seqGraphWt(0)
        G.loadFromFile(folderName, contigReadGraph)

        Grev = abunGraphLib.formReverseGraphFast(G)

        abunAnalysisList = []

        for eachitem in repeatPairs:
            inList, outList = eachitem[0], eachitem[1]
            if not abunHouseKeeper.abunGlobalRunEM:
                resolvedList, brResolvedList = [], []

                if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAbunB:
                    if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB > 0:
                        abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLower = abunHouseKeeper.abunGlobalSplitParameterRobot.AbunLowerB

                    if abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB > 0:
                        abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpper = abunHouseKeeper.abunGlobalSplitParameterRobot.AbunUpperB

                    if not abunHouseKeeper.abunGlobalSplitParameterRobot.toRunAggB:
                        resolvedList = determindMatch(inList, outList,
                                                      myCountDic, folderName,
                                                      contigReadGraph, N1)
                    else:

                        resolvedList = determindMatchAggregate(
                            inList, outList, myCountDic, folderName,
                            contigReadGraph, N1, Gnew, lenDic)

                if abunHouseKeeper.abunGlobalSplitParameterRobot.toRunBRB:
                    if abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB > 0:
                        abunHouseKeeper.abunGlobalSplitParameterRobot.BRThres = abunHouseKeeper.abunGlobalSplitParameterRobot.BRThresB

                    brResolvedList = formBRReolve(folderName, inList, outList,
                                                  G, Grev, True, N1)

                combinedList = abunHouseKeeper.getDistinct(resolvedList +
                                                           brResolvedList)

                print "resolvedList, brResolvedList, inList, outList", resolvedList, brResolvedList, inList, outList

                print "resolveConflict(combinedList)", resolveConflict(
                    combinedList)

                abunAnalysisList.append([
                    inList, outList, resolvedList, brResolvedList,
                    resolveConflict(combinedList)
                ])
                if len(inList) <= maxRThres and len(
                        outList) <= maxRThres and len(inList) > 0 and len(
                            outList) > 0:
                    resolvedCombine = resolveConflict(combinedList)
                    Gnew.bipartiteLocalResolve(resolvedCombine, inList,
                                               outList, folderName)
            else:
                import emalgo
                resolvedCombine = emalgo.BResolvePreparation(
                    folderName, inList, outList, G, Grev, N1, mummerLink)
                Gnew.bipartiteLocalResolve(resolvedCombine, inList, outList,
                                           folderName)

        Gnew.condense()

        with open(folderName + "biResolvedCombineList.json", 'w') as f:
            json.dump(biResolvedCombineList, f)

        with open(folderName + "abunAnalysisList.json", 'w') as f:
            json.dump(abunAnalysisList, f)

        #assert(1==2)

        return Gnew

    else:
        return Gnew