def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption): outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20 contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment) targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment) dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment ) connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList) contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() multiplicityDic = G.findEdgeMultiplicity() potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic) if useSpades == True: cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic) else: cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList) scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList) rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
def test_findSpanReadsListEmbed(self): connectingReadsList = [] contigDummyLRecord = [601, 800, 1, 200, 200, 200, 100.0, 800, 400, "ContigDummyL", "ReadDummy"] contigDummyBRecord1 = [1, 200, 101, 300, 200, 200, 100.0, 200, 400, "ContigDummyB1", "ReadDummy"] contigDummyBRecord2 = [1, 200, 350, 151, 200, 200, 100.0, 200, 400, "ContigDummyB2", "ReadDummy"] contigDummyRRecord = [1, 200, 201, 400, 200, 200, 100.0, 800, 400, "ContigDummyR", "ReadDummy"] connectingReadsList.append(["ReadDummy", "L", "ContigDummyL", contigDummyLRecord]) connectingReadsList.append(["ReadDummy", "B", "ContigDummyB1", contigDummyBRecord1]) connectingReadsList.append(["ReadDummy", "B", "ContigDummyB2", contigDummyBRecord2]) connectingReadsList.append(["ReadDummy", "R", "ContigDummyR", contigDummyRRecord]) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList) expectedSpanReadsList = [ ["ContigDummyL_p", "ContigDummyB1_p", "ReadDummy"], ["ContigDummyB1_p", "ContigDummyB2_d", "ReadDummy"], ["ContigDummyB2_d", "ContigDummyR_p", "ReadDummy"], ] assert spanReadsList.sort() == expectedSpanReadsList.sort() assert len(contigGapReadLookUpDic) == 3 assert ( contigGapReadLookUpDic["ContigDummyL_p-ContigDummyB1_p"].sort() == [[contigDummyLRecord, contigDummyBRecord1]].sort() ) assert ( contigGapReadLookUpDic["ContigDummyB1_p-ContigDummyB2_d"].sort() == [[contigDummyBRecord1, contigDummyBRecord2]].sort() ) assert ( contigGapReadLookUpDic["ContigDummyB2_d-ContigDummyR_p"].sort() == [[contigDummyBRecord2, contigDummyRRecord]].sort() )
def test_findSpanReadsList(self): connectingReadsList = [] contigDummyLRecord, contigDummyRRecord = [601, 800, 1, 200, 200, 200, 100.0, 800, 400, 'ContigDummyL', 'ReadDummy'], [1, 200, 201, 400, 200, 200, 100.0, 800, 400, 'ContigDummyR', 'ReadDummy'] connectingReadsList.append(['ReadDummy', 'L', 'ContigDummyL', contigDummyLRecord]) connectingReadsList.append(['ReadDummy', 'R', 'ContigDummyR', contigDummyRRecord]) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList) assert(spanReadsList == [ ['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy'] ] ) assert(len(contigGapReadLookUpDic) == 1) assert(contigGapReadLookUpDic['ContigDummyL_p-ContigDummyR_p'].sort() == [[contigDummyLRecord,contigDummyRRecord]].sort())
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption): outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20 contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders( folderName, inputContigsFilename, contigsFilename, noAlignment) targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders( folderName, inputReadsFilename, readsFilename, noAlignment) dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment) connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList( connectingReadsList) contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() multiplicityDic = G.findEdgeMultiplicity() potentialMergesList = setCoverLib.extendConnectivityFromReads( condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic) if useSpades == True: cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic) else: cTestLib.assignCoverageFromDataList(G, dataList, folderName, contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList) scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy( scoreStructList) rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
def test_findSpanReadsList(self): connectingReadsList = [] contigDummyLRecord, contigDummyRRecord = ( [601, 800, 1, 200, 200, 200, 100.0, 800, 400, "ContigDummyL", "ReadDummy"], [1, 200, 201, 400, 200, 200, 100.0, 800, 400, "ContigDummyR", "ReadDummy"], ) connectingReadsList.append(["ReadDummy", "L", "ContigDummyL", contigDummyLRecord]) connectingReadsList.append(["ReadDummy", "R", "ContigDummyR", contigDummyRRecord]) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList) assert spanReadsList == [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]] assert len(contigGapReadLookUpDic) == 1 assert ( contigGapReadLookUpDic["ContigDummyL_p-ContigDummyR_p"].sort() == [[contigDummyLRecord, contigDummyRRecord]].sort() )
def test_findSpanReadsListEmbed(self): connectingReadsList = [] contigDummyLRecord = [601, 800, 1, 200, 200, 200, 100.0, 800, 400, 'ContigDummyL', 'ReadDummy'] contigDummyBRecord1 = [1, 200, 101, 300, 200, 200, 100.0, 200, 400, 'ContigDummyB1', 'ReadDummy'] contigDummyBRecord2 = [1, 200, 350, 151, 200, 200, 100.0, 200, 400, 'ContigDummyB2', 'ReadDummy'] contigDummyRRecord = [1, 200, 201, 400, 200, 200, 100.0, 800, 400, 'ContigDummyR', 'ReadDummy'] connectingReadsList.append(['ReadDummy', 'L', 'ContigDummyL', contigDummyLRecord]) connectingReadsList.append(['ReadDummy', 'B', 'ContigDummyB1', contigDummyBRecord1]) connectingReadsList.append(['ReadDummy', 'B', 'ContigDummyB2', contigDummyBRecord2]) connectingReadsList.append(['ReadDummy', 'R', 'ContigDummyR', contigDummyRRecord]) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList) expectedSpanReadsList = [['ContigDummyL_p', 'ContigDummyB1_p', 'ReadDummy'], \ ['ContigDummyB1_p', 'ContigDummyB2_d', 'ReadDummy'], \ ['ContigDummyB2_d', 'ContigDummyR_p', 'ReadDummy']] assert(spanReadsList.sort() == expectedSpanReadsList.sort()) assert(len(contigGapReadLookUpDic) == 3) assert(contigGapReadLookUpDic['ContigDummyL_p-ContigDummyB1_p'].sort() == [[contigDummyLRecord,contigDummyBRecord1]].sort()) assert(contigGapReadLookUpDic['ContigDummyB1_p-ContigDummyB2_d'].sort() == [[contigDummyBRecord1,contigDummyBRecord2]].sort()) assert(contigGapReadLookUpDic['ContigDummyB2_d-ContigDummyR_p'].sort() == [[contigDummyBRecord2,contigDummyRRecord]].sort())