示例#1
0
    def qa(self, options):
        """QA command"""
        self.logger.info('[CheckM - qa] Tabulating genome statistics.')

        checkDirExists(options.analyze_dir)

        if options.exclude_markers:
            checkFileExists(options.exclude_markers)

        # calculate AAI between marks with multiple hits in a single bin
        aai = AminoAcidIdentity()
        aai.run(options.aai_strain, options.analyze_dir,
                options.alignment_file)

        # get HMM file for each bin

        markerSetParser = MarkerSetParser(options.threads)

        hmmModelInfoFile = os.path.join(options.analyze_dir, 'storage',
                                        DefaultValues.CHECKM_HMM_MODEL_INFO)
        binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile)

        binIdToBinMarkerSets = markerSetParser.getMarkerSets(
            options.analyze_dir, getBinIdsFromOutDir(options.analyze_dir),
            options.marker_file, options.exclude_markers)

        # get results for each bin
        RP = ResultsParser(binIdToModels)
        RP.analyseResults(
            options.analyze_dir,
            DefaultValues.BIN_STATS_OUT,
            DefaultValues.HMMER_TABLE_OUT,
            bIgnoreThresholds=options.bIgnoreThresholds,
            evalueThreshold=options.e_value,
            lengthThreshold=options.length,
            bSkipPseudoGeneCorrection=options.bSkipPseudoGeneCorrection,
            bSkipAdjCorrection=options.bSkipAdjCorrection)

        RP.printSummary(options.out_format,
                        aai,
                        binIdToBinMarkerSets,
                        options.bIndividualMarkers,
                        options.coverage_file,
                        options.bTabTable,
                        options.file,
                        anaFolder=options.analyze_dir)
        RP.cacheResults(options.analyze_dir, binIdToBinMarkerSets,
                        options.bIndividualMarkers)

        if options.file != '':
            self.logger.info('QA information written to: ' + options.file)

        self.timeKeeper.printTimeStamp()
示例#2
0
    def binQAPlot(self, options):
        """Bin QA plot command"""

        self.logger.info(
            '[CheckM - bin_qa_plot] Creating bar plot of bin quality.')

        checkDirExists(options.bin_dir)
        makeSurePathExists(options.output_dir)

        binFiles = self.binFiles(options.bin_dir, options.extension)

        # read model info
        # hmmModelInfoFile = os.path.join(options.analyze_dir, 'storage', DefaultValues.CHECKM_HMM_MODEL_INFO)
        # binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile)

        # read sequence stats file
        resultsParser = ResultsParser(None)
        binStatsExt = resultsParser.parseBinStatsExt(options.results_dir)

        # create plot for each bin
        plot = BinQAPlot(options)
        bMakePlot = True
        if not options.bIgnoreHetero:
            aai = AminoAcidIdentity()
            aai.run(options.aai_strain, options.results_dir, None)
            bMakePlot = plot.plot(binFiles, binStatsExt, options.bIgnoreHetero,
                                  aai.aaiHetero)
        else:
            bMakePlot = plot.plot(binFiles, binStatsExt, options.bIgnoreHetero,
                                  None)

        if bMakePlot:
            outputFile = os.path.join(options.output_dir,
                                      'bin_qa_plot.' + options.image_type)
            plot.savePlot(outputFile, dpi=options.dpi)

            self.logger.info('Plot written to: ' + outputFile)

        self.timeKeeper.printTimeStamp()