def list(self, rankFilter='ALL'): """ List all available marker sets from the specified rank.""" taxonMarkerSets = self.readMarkerSets() header = [ 'Rank', 'Taxon', '# genomes', '# marker genes', '# marker sets' ] pTable = prettytable.PrettyTable(header) pTable.align = 'c' pTable.align['Rank'] = 'l' pTable.align['Taxon'] = 'l' pTable.hrules = prettytable.FRAME pTable.vrules = prettytable.NONE for rank in taxonomicRanks: if rankFilter == 'ALL' or rankFilter == rank: for taxon in sorted(taxonMarkerSets[rank]): markerSet = taxonMarkerSets[rank][taxon] numMarkers, numMarkerSets = markerSet.size() pTable.add_row([ rank, taxon, markerSet.numGenomes, numMarkers, numMarkerSets ]) print('') print(pTable.get_string())
def __printSimpleSummaryTable(self, binIdToTaxonomy, resultsParser, bTabTable, outFile): # redirect output oldStdOut = reassignStdOut(outFile) arbitraryBinId = binIdToTaxonomy.keys()[0] markerCountLabel = '# unique markers (of %d)' % len(resultsParser.models[arbitraryBinId]) header = ['Bin Id', markerCountLabel, '# multi-copy', 'Taxonomy'] if bTabTable: pTable = None print('\t'.join(header)) else: pTable = prettytable.PrettyTable(header) pTable.float_format = '.2' pTable.align = 'c' pTable.align[header[0]] = 'l' pTable.align['Taxonomy'] = 'l' pTable.hrules = prettytable.FRAME pTable.vrules = prettytable.NONE for binId in sorted(binIdToTaxonomy.keys()): uniqueHits, multiCopyHits = resultsParser.results[binId].countUniqueHits() row = [binId, uniqueHits, multiCopyHits, binIdToTaxonomy[binId]] if bTabTable: print('\t'.join(map(str, row))) else: pTable.add_row(row) if not bTabTable: print(pTable.get_string(sortby=markerCountLabel, reversesort=True)) # restore stdout restoreStdOut(outFile, oldStdOut)
def printSummary(self, outputFormat, aai, binIdToBinMarkerSets, bIndividualMarkers, coverageFile, bTabTable, outFile, anaFolder): # redirect output oldStdOut = reassignStdOut(outFile) coverageBinProfiles = None if coverageFile: coverage = Coverage(1) coverageBinProfiles = coverage.binProfiles(coverageFile) prettyTableFormats = [1, 2, 3, 9] header = self.__getHeader( outputFormat, binIdToBinMarkerSets[list(binIdToBinMarkerSets.keys())[0]], coverageBinProfiles, bTabTable) if bTabTable or outputFormat not in prettyTableFormats: bTabTable = True pTable = None if header != None: print('\t'.join(header)) else: pTable = prettytable.PrettyTable(header) pTable.float_format = '.2' pTable.align = 'c' pTable.align[header[0]] = 'l' pTable.hrules = prettytable.FRAME pTable.vrules = prettytable.NONE seqsReported = 0 for binId in sorted(self.results.keys()): seqsReported += self.results[binId].printSummary( outputFormat, aai, binIdToBinMarkerSets[binId], bIndividualMarkers, coverageBinProfiles, pTable, anaFolder) if outputFormat in [6, 7] and seqsReported == 0: print('[No marker genes satisfied the reporting criteria.]') if not bTabTable: if outputFormat in [1, 2]: print( pTable.get_string(sortby='Completeness', reversesort=True)) else: # only print if there are rows if pTable.get_string(print_empty=False): print(pTable.get_string(print_empty=False)) # restore stdout restoreStdOut(outFile, oldStdOut)
def __printFullTable(self, binIdToUID, binIdToTaxonomy, binIdToSisterTaxonomy, binIdToLineageStatistics, resultsParser, binStats, bTabTable, outFile): # redirect output oldStdOut = reassignStdOut(outFile) arbitraryBinId = list(binIdToTaxonomy.keys())[0] markerCountLabel = '# unique markers (of %d)' % len( resultsParser.models[arbitraryBinId]) header = ['Bin Id', markerCountLabel, "# multi-copy"] header += [ 'Insertion branch UID', 'Taxonomy (contained)', 'Taxonomy (sister lineage)' ] header += [ 'GC', 'Genome size (Mbp)', 'Gene count', 'Coding density', 'Translation table' ] header += [ '# descendant genomes', 'Lineage: GC mean', 'Lineage: GC std' ] header += [ 'Lineage: genome size (Mbp) mean', 'Lineage: genome size (Mbp) std' ] header += ['Lineage: gene count mean', 'Lineage: gene count std'] if bTabTable: pTable = None print(('\t'.join(header))) else: pTable = prettytable.PrettyTable(header) pTable.float_format = '.2' pTable.float_format['GC'] = '.1' pTable.float_format['Lineage: GC mean'] = '.1' pTable.float_format['Lineage: GC std'] = '.1' pTable.float_format['Lineage: gene count mean'] = '.0' pTable.float_format['Lineage: gene count std'] = '.0' pTable.align = 'c' pTable.align[header[0]] = 'l' pTable.align['Insertion branch UID'] = 'l' pTable.align['Taxonomy (contained)'] = 'l' pTable.align['Taxonomy (sister lineage)'] = 'l' pTable.hrules = prettytable.FRAME pTable.vrules = prettytable.NONE for binId in sorted(binIdToTaxonomy.keys()): uniqueHits, multiCopyHits = resultsParser.results[ binId].countUniqueHits() truncSisterLineage = binIdToSisterTaxonomy[binId] for taxa in binIdToTaxonomy[binId].split(';'): truncSisterLineage = truncSisterLineage.replace(taxa + ';', '') if len(truncSisterLineage) == 0: truncSisterLineage = 'unresolved' elif truncSisterLineage[-1] == ';': truncSisterLineage = truncSisterLineage[0:-1] row = [binId, uniqueHits, multiCopyHits] row += [ binIdToUID[binId], binIdToTaxonomy[binId], truncSisterLineage ] row += [binStats[binId]['GC'] * 100] row += [float(binStats[binId]['Genome size']) / 1e6] row += [binStats[binId]['# predicted genes']] row += [binStats[binId]['Coding density']] row += [binStats[binId]['Translation table']] row += [binIdToLineageStatistics[binId]['# genomes']] row += [binIdToLineageStatistics[binId]['gc mean']] row += [binIdToLineageStatistics[binId]['gc std']] row += [binIdToLineageStatistics[binId]['genome size mean']] row += [binIdToLineageStatistics[binId]['genome size std']] row += [binIdToLineageStatistics[binId]['gene count mean']] row += [binIdToLineageStatistics[binId]['gene count std']] if bTabTable: print(('\t'.join(map(str, row)))) else: pTable.add_row(row) if not bTabTable: print((pTable.get_string(sortby=markerCountLabel, reversesort=True))) # restore stdout restoreStdOut(outFile, oldStdOut)
def run(self, coverageFile, outFile, bTabTable): checkFileExists(coverageFile) # get number of reads mapped to each bin self.logger.info('Determining number of reads mapped to each bin.') readsMappedToBin = {} binSize = {} totalMappedReads = {} bHeader = True for line in open(coverageFile): if bHeader: bHeader = False continue lineSplit = line.split('\t') # seqId = lineSplit[0] binId = lineSplit[1] seqLen = int(lineSplit[2]) binSize[binId] = binSize.get(binId, 0) + seqLen if binId not in readsMappedToBin: readsMappedToBin[binId] = {} for i in range(3, len(lineSplit), 3): bamId = lineSplit[i] mappedReads = int(lineSplit[i + 2]) totalMappedReads[bamId] = totalMappedReads.get(bamId, 0) + mappedReads readsMappedToBin[binId][bamId] = readsMappedToBin[binId].get( bamId, 0) + mappedReads # calculate percentage of mapped reads to binned populations perMappedReads = {} normBinCoverage = {} sumNormBinCoverage = {} for binId, bamIds in readsMappedToBin.items(): perMappedReads[binId] = {} normBinCoverage[binId] = {} for bamId in bamIds: perMR = float( readsMappedToBin[binId][bamId]) / totalMappedReads[bamId] perMappedReads[binId][bamId] = perMR if binId == DefaultValues.UNBINNED: continue normCoverage = perMR / binSize[binId] normBinCoverage[binId][bamId] = normCoverage sumNormBinCoverage[bamId] = sumNormBinCoverage.get( bamId, 0) + normCoverage for binId, bamIds in normBinCoverage.items(): for bamId in bamIds: if sumNormBinCoverage[bamId] != 0: normBinCoverage[binId][bamId] /= sumNormBinCoverage[bamId] else: normBinCoverage[binId][bamId] = 0 # write community profile oldStdOut = reassignStdOut(outFile) sortedBinIds = sorted(readsMappedToBin.keys()) sortedBamIds = sorted(readsMappedToBin[sortedBinIds[0]].keys()) header = ['Bin Id', 'Bin size (Mbp)'] for bamId in sortedBamIds: header += [bamId + ': mapped reads'] header += [bamId + ': % mapped reads'] header += [bamId + ': % binned populations'] header += [bamId + ': % community'] if bTabTable: print('\t'.join(header)) else: pTable = prettytable.PrettyTable(header) pTable.float_format = '.2' pTable.align = 'c' pTable.align[header[0]] = 'l' pTable.hrules = prettytable.FRAME pTable.vrules = prettytable.NONE for binId in sortedBinIds: row = [binId] row += [float(binSize[binId]) / 1e6] for bamId in sortedBamIds: row += [readsMappedToBin[binId][bamId]] row += [perMappedReads[binId][bamId] * 100.0] if DefaultValues.UNBINNED in perMappedReads: unbinnedPercentage = perMappedReads[ DefaultValues.UNBINNED][bamId] else: unbinnedPercentage = 0 if binId == DefaultValues.UNBINNED: row += ['NA'] row += [unbinnedPercentage * 100.0] else: row += [normBinCoverage[binId][bamId] * 100.0] row += [ normBinCoverage[binId][bamId] * 100.0 * (1.0 - unbinnedPercentage) ] if bTabTable: print('\t'.join(list(map(str, row)))) else: pTable.add_row(row) if not bTabTable: print(pTable.get_string()) restoreStdOut(outFile, oldStdOut)