def plotRelativeAbundanceCorrelations(infiles, outfile): ''' plot the correlation between the estimated relative abundance of species and the true relative abundances - done on the shared set ''' PipelineMetagenomeBenchmark.plotRelativeAbundanceCorrelations(infiles, outfile)
def plotRelativeAbundanceCorrelations(infiles, outfile): ''' plot the correlation between the estimated relative abundance of species and the true relative abundances - done on the shared set ''' PipelineMetagenomeBenchmark.plotRelativeAbundanceCorrelations( infiles, outfile)
def buildTrueTaxonomicRelativeAbundances(infiles, outfile): ''' get species level relative abundances for the simulateds data. This involes creating maps between different identifiers from the NCBI taxonomy. This is so that the results are comparable to species level analysis from metaphlan The gi_taxid_nucl is a huge table and therefore this function takes an age to run - can think of optimising this somehow ''' to_cluster = True PipelineMetagenomeBenchmark.buildTrueTaxonomicRelativeAbundances(infiles, outfile)
def buildTrueTaxonomicRelativeAbundances(infiles, outfile): ''' relative abundances for the simulateds at different levels of the taxonomy. This involes creating maps between different identifiers from the NCBI taxonomy. This is so that the results are comparable to species level analysis from metaphlan The gi_taxid_nucl is a huge table and therefore this function takes an age to run - can think of optimising this somehow ''' to_cluster = True PipelineMetagenomeBenchmark.buildTrueTaxonomicRelativeAbundances( infiles, outfile)
def plotCoverageOverGenomes(infile, outfile): ''' plot the percent coverage over each genome ''' PipelineMetagenomeBenchmark.plotCoverageOverGenomes(infile, outfile)
def calculateFalsePositiveRate(infiles, outfile): ''' calculate the false positive rate in taxonomic abundances ''' PipelineMetagenomeBenchmark.calculateFalsePositiveRate(infiles, outfile)