Пример #1
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader",  20, 20  
    contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment )
    
    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)
    
    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)
    
    contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename)
    
    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
    
    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic)
    
    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename)
    
    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)
    
    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList)

    rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
Пример #2
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename,
             useSpades, noAlignment, scoreListOutputName,
             outputContigsFilename, mScoreThres, conScoreThres,
             setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20
    contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink,
                                               readsFilename, contigsFilename,
                                               splitNum, outputHeader,
                                               parallelNum, noAlignment)

    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)

    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(
        connectingReadsList)

    contigsNamesList = alignmentLib.findContigsNames(folderName,
                                                     contigsFilename)

    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)

    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(
        condenseCandidatesList, connectingReadsList, contigsNamesList,
        setCoverOption, multiplicityDic)

    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename,
                                          targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList, folderName,
                                            contigsFilename)

    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)

    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(
        scoreStructList)

    rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename,
                            readsFilename, scoreListWithDummy,
                            contigGapReadLookUpDic, mScoreThres, conScoreThres,
                            scoreListOutputName, outputContigsFilename,
                            dummyNodeDataRobot)
Пример #3
0
 def test_findConnectingReadsListEmbed(self):
     dataList = [[1, 200, 201, 400, 200, 200, 100.0, 200, 800, 'ContigDummy', 'ReadDummy']]
     connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)
     assert(connectingReadsList == [['ReadDummy', 'B', 'ContigDummy',[1, 200, 201, 400, 200, 200, 100.0, 200, 800, 'ContigDummy', 'ReadDummy']]])
Пример #4
0
 def test_findConnectingReadsListEmbed(self):
     dataList = [[1, 200, 201, 400, 200, 200, 100.0, 200, 800, "ContigDummy", "ReadDummy"]]
     connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)
     assert connectingReadsList == [
         ["ReadDummy", "B", "ContigDummy", [1, 200, 201, 400, 200, 200, 100.0, 200, 800, "ContigDummy", "ReadDummy"]]
     ]