def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption): outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20 contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment) targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment) dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment ) connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList) contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() multiplicityDic = G.findEdgeMultiplicity() potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic) if useSpades == True: cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic) else: cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList) scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList) rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
def test_rankAndMerge(self): contigList = [] contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description="")) contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description="")) SeqIO.write(contigList, self.folderName + self.contigsFilename , "fasta") SeqIO.write([SeqRecord(Seq("CCCGGGCCC", generic_dna), id="ReadDummy", description="")], self.folderName + self.readsFilename , "fasta") scoreList = [ ['ContigDummyL_R~ContigDummyR_L~1' , 1 , 1] ] contigGapReadLookUpDic = {} contigDummyLRecord, contigDummyRRecord = [4, 6, 1, 3, 3, 3, 100.0, 6, 9, 'ContigDummyL', 'ReadDummy'], [1, 3, 7, 9, 3, 3, 100.0, 7, 9, 'ContigDummyR', 'ReadDummy'] contigGapReadLookUpDic['ContigDummyL_p-ContigDummyR_p'] = [[contigDummyLRecord, contigDummyRRecord]] contigsNamesList = alignmentLib.findContigsNames(self.folderName, self.contigsFilename) dummyNodeDataRobot = setCoverLib.dummyNodeController() dummyNodeDataRobot.realToDummyDic = {'ContigDummyL': 'ContigDummyL','ContigDummyR': 'ContigDummyR'} dummyNodeDataRobot.dummyToRealDic = {'ContigDummyL': 'ContigDummyL','ContigDummyR': 'ContigDummyR'} rankingLib.rankAndMerge(self.folderName,contigsNamesList, self.contigsFilename, self.readsFilename, scoreList, contigGapReadLookUpDic, 1, 0.95, "scoreList.json", "improved.fasta", dummyNodeDataRobot) expectedContig= "AAACCC" + "GGG" + "CCCTTTT" records = list(SeqIO.parse(self.folderName + "improved.fasta", "fasta")) assert(expectedContig == str(records[0].seq))
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption): outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20 contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders( folderName, inputContigsFilename, contigsFilename, noAlignment) targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders( folderName, inputReadsFilename, readsFilename, noAlignment) dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment) connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList) spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList( connectingReadsList) contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename) G = graphLib.formContigGraph(spanReadsList, contigsNamesList) condenseCandidatesList = G.findCondenseCandidatesList() multiplicityDic = G.findEdgeMultiplicity() potentialMergesList = setCoverLib.extendConnectivityFromReads( condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic) if useSpades == True: cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic) else: cTestLib.assignCoverageFromDataList(G, dataList, folderName, contigsFilename) scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList) scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy( scoreStructList) rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
def test_rankAndMerge(self): contigList = [] contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description="")) contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description="")) SeqIO.write(contigList, self.folderName + self.contigsFilename, "fasta") SeqIO.write( [SeqRecord(Seq("CCCGGGCCC", generic_dna), id="ReadDummy", description="")], self.folderName + self.readsFilename, "fasta", ) scoreList = [["ContigDummyL_R~ContigDummyR_L~1", 1, 1]] contigGapReadLookUpDic = {} contigDummyLRecord, contigDummyRRecord = ( [4, 6, 1, 3, 3, 3, 100.0, 6, 9, "ContigDummyL", "ReadDummy"], [1, 3, 7, 9, 3, 3, 100.0, 7, 9, "ContigDummyR", "ReadDummy"], ) contigGapReadLookUpDic["ContigDummyL_p-ContigDummyR_p"] = [[contigDummyLRecord, contigDummyRRecord]] contigsNamesList = alignmentLib.findContigsNames(self.folderName, self.contigsFilename) dummyNodeDataRobot = setCoverLib.dummyNodeController() dummyNodeDataRobot.realToDummyDic = {"ContigDummyL": "ContigDummyL", "ContigDummyR": "ContigDummyR"} dummyNodeDataRobot.dummyToRealDic = {"ContigDummyL": "ContigDummyL", "ContigDummyR": "ContigDummyR"} rankingLib.rankAndMerge( self.folderName, contigsNamesList, self.contigsFilename, self.readsFilename, scoreList, contigGapReadLookUpDic, 1, 0.95, "scoreList.json", "improved.fasta", dummyNodeDataRobot, ) expectedContig = "AAACCC" + "GGG" + "CCCTTTT" records = list(SeqIO.parse(self.folderName + "improved.fasta", "fasta")) assert expectedContig == str(records[0].seq)
def test_findContigsNames(self): self.createSimpleFasta() contigsNamesList = alignmentLib.findContigsNames(self.folderName, self.contigsFilename) assert(contigsNamesList == ['ContigDummy'] )
def test_findContigsNames(self): self.createSimpleFasta() contigsNamesList = alignmentLib.findContigsNames(self.folderName, self.contigsFilename) assert contigsNamesList == ["ContigDummy"]