def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption): outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20 contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment) targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment) dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment ) connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList) contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() multiplicityDic = G.findEdgeMultiplicity() potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic) if useSpades == True: cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic) else: cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList) scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList) rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
def test_findCondenseCandidatesList(self): spanReadsList, contigsNamesList = ( [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]], ["ContigDummyL", "ContigDummyR"], ) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() assert condenseCandidatesList == [[["ContigDummyL_R~ContigDummyR_L~1"], False]]
def test_formContigGraph(self): spanReadsList, contigsNamesList = [['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy']], ['ContigDummyL', 'ContigDummyR'] G = graphLib.formContigGraph(spanReadsList, contigsNamesList) tmpObjL = G.dicOfContigNodes['ContigDummyL'] tmpObjR = G.dicOfContigNodes['ContigDummyR'] assert(len(tmpObjL.leftEndContainer.connectedContigsDic) == 0) assert(len(tmpObjL.rightEndContainer.connectedContigsDic) == 1 ) assert(len(tmpObjR.leftEndContainer.connectedContigsDic) == 1) assert(len(tmpObjR.rightEndContainer.connectedContigsDic) == 0 )
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption): outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20 contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders( folderName, inputContigsFilename, contigsFilename, noAlignment) targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders( folderName, inputReadsFilename, readsFilename, noAlignment) dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment) connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList( connectingReadsList) contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() multiplicityDic = G.findEdgeMultiplicity() potentialMergesList = setCoverLib.extendConnectivityFromReads( condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic) if useSpades == True: cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic) else: cTestLib.assignCoverageFromDataList(G, dataList, folderName, contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList) scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy( scoreStructList) rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
def test_formContigGraph(self): spanReadsList, contigsNamesList = ( [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]], ["ContigDummyL", "ContigDummyR"], ) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) tmpObjL = G.dicOfContigNodes["ContigDummyL"] tmpObjR = G.dicOfContigNodes["ContigDummyR"] assert len(tmpObjL.leftEndContainer.connectedContigsDic) == 0 assert len(tmpObjL.rightEndContainer.connectedContigsDic) == 1 assert len(tmpObjR.leftEndContainer.connectedContigsDic) == 1 assert len(tmpObjR.rightEndContainer.connectedContigsDic) == 0
def test_assignCoverageFromDataList(self): dataList, contigList = [ [1, 6, 1, 6, 6, 6, 100.0, 6, 6, 'ContigDummyL', 'ReadDummy'] ], [] contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description="")) contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description="")) SeqIO.write(contigList, self.folderName + self.contigsFilename , "fasta") spanReadsList, contigsNamesList = [['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy']], ['ContigDummyL', 'ContigDummyR'] G = graphLib.formContigGraph(spanReadsList, contigsNamesList) cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename) assert(G.dicOfContigNodes['ContigDummyL'].contigLength == 6) assert(G.dicOfContigNodes['ContigDummyR'].contigLength == 7) assert(G.dicOfContigNodes['ContigDummyL'].readToContigCount == 1) assert(G.dicOfContigNodes['ContigDummyR'].readToContigCount == 0)
def test_calculateConfidenceScore(self): condenseCandidatesList = [[['ContigDummyL_R~ContigDummyR_L~1'], False]] spanReadsList, contigsNamesList = [['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy']], ['ContigDummyL', 'ContigDummyR'] dataList, contigList = [ [1, 6, 1, 6, 6, 6, 100.0, 6, 6, 'ContigDummyL', 'ReadDummy'] ], [] contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description="")) contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description="")) SeqIO.write(contigList, self.folderName + self.contigsFilename , "fasta") G = graphLib.formContigGraph(spanReadsList, contigsNamesList) cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, condenseCandidatesList) assert(scoreStructList[0][0][0][0] == 'ContigDummyL_R~ContigDummyR_L~1' ) assert(abs(scoreStructList[0][0][0][1] - 0.53846153846153844) < 0.01) assert(scoreStructList[0][0][0][2] == 1) assert(scoreStructList[0][1] == False)
def test_assignCoverageFromDataList(self): dataList, contigList = [[1, 6, 1, 6, 6, 6, 100.0, 6, 6, "ContigDummyL", "ReadDummy"]], [] contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description="")) contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description="")) SeqIO.write(contigList, self.folderName + self.contigsFilename, "fasta") spanReadsList, contigsNamesList = ( [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]], ["ContigDummyL", "ContigDummyR"], ) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename) assert G.dicOfContigNodes["ContigDummyL"].contigLength == 6 assert G.dicOfContigNodes["ContigDummyR"].contigLength == 7 assert G.dicOfContigNodes["ContigDummyL"].readToContigCount == 1 assert G.dicOfContigNodes["ContigDummyR"].readToContigCount == 0
def test_calculateConfidenceScore(self): condenseCandidatesList = [[["ContigDummyL_R~ContigDummyR_L~1"], False]] spanReadsList, contigsNamesList = ( [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]], ["ContigDummyL", "ContigDummyR"], ) dataList, contigList = [[1, 6, 1, 6, 6, 6, 100.0, 6, 6, "ContigDummyL", "ReadDummy"]], [] contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description="")) contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description="")) SeqIO.write(contigList, self.folderName + self.contigsFilename, "fasta") G = graphLib.formContigGraph(spanReadsList, contigsNamesList) cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, condenseCandidatesList) assert scoreStructList[0][0][0][0] == "ContigDummyL_R~ContigDummyR_L~1" assert abs(scoreStructList[0][0][0][1] - 0.53846153846153844) < 0.01 assert scoreStructList[0][0][0][2] == 1 assert scoreStructList[0][1] == False
def test_findCondenseCandidatesList(self): spanReadsList, contigsNamesList = [['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy']], ['ContigDummyL', 'ContigDummyR'] G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() assert(condenseCandidatesList == [[['ContigDummyL_R~ContigDummyR_L~1'], False]])