def qa(self, options): """QA command""" self.logger.info('[CheckM - qa] Tabulating genome statistics.') checkDirExists(options.analyze_dir) if options.exclude_markers: checkFileExists(options.exclude_markers) # calculate AAI between marks with multiple hits in a single bin aai = AminoAcidIdentity() aai.run(options.aai_strain, options.analyze_dir, options.alignment_file) # get HMM file for each bin markerSetParser = MarkerSetParser(options.threads) hmmModelInfoFile = os.path.join(options.analyze_dir, 'storage', DefaultValues.CHECKM_HMM_MODEL_INFO) binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile) binIdToBinMarkerSets = markerSetParser.getMarkerSets( options.analyze_dir, getBinIdsFromOutDir(options.analyze_dir), options.marker_file, options.exclude_markers) # get results for each bin RP = ResultsParser(binIdToModels) RP.analyseResults( options.analyze_dir, DefaultValues.BIN_STATS_OUT, DefaultValues.HMMER_TABLE_OUT, bIgnoreThresholds=options.bIgnoreThresholds, evalueThreshold=options.e_value, lengthThreshold=options.length, bSkipPseudoGeneCorrection=options.bSkipPseudoGeneCorrection, bSkipAdjCorrection=options.bSkipAdjCorrection) RP.printSummary(options.out_format, aai, binIdToBinMarkerSets, options.bIndividualMarkers, options.coverage_file, options.bTabTable, options.file, anaFolder=options.analyze_dir) RP.cacheResults(options.analyze_dir, binIdToBinMarkerSets, options.bIndividualMarkers) if options.file != '': self.logger.info('QA information written to: ' + options.file) self.timeKeeper.printTimeStamp()
def binQAPlot(self, options): """Bin QA plot command""" self.logger.info( '[CheckM - bin_qa_plot] Creating bar plot of bin quality.') checkDirExists(options.bin_dir) makeSurePathExists(options.output_dir) binFiles = self.binFiles(options.bin_dir, options.extension) # read model info # hmmModelInfoFile = os.path.join(options.analyze_dir, 'storage', DefaultValues.CHECKM_HMM_MODEL_INFO) # binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile) # read sequence stats file resultsParser = ResultsParser(None) binStatsExt = resultsParser.parseBinStatsExt(options.results_dir) # create plot for each bin plot = BinQAPlot(options) bMakePlot = True if not options.bIgnoreHetero: aai = AminoAcidIdentity() aai.run(options.aai_strain, options.results_dir, None) bMakePlot = plot.plot(binFiles, binStatsExt, options.bIgnoreHetero, aai.aaiHetero) else: bMakePlot = plot.plot(binFiles, binStatsExt, options.bIgnoreHetero, None) if bMakePlot: outputFile = os.path.join(options.output_dir, 'bin_qa_plot.' + options.image_type) plot.savePlot(outputFile, dpi=options.dpi) self.logger.info('Plot written to: ' + outputFile) self.timeKeeper.printTimeStamp()