示例#1
0
    def qa(self, options):
        """QA command"""
        self.logger.info('[CheckM - qa] Tabulating genome statistics.')

        checkDirExists(options.analyze_dir)

        if options.exclude_markers:
            checkFileExists(options.exclude_markers)

        # calculate AAI between marks with multiple hits in a single bin
        aai = AminoAcidIdentity()
        aai.run(options.aai_strain, options.analyze_dir,
                options.alignment_file)

        # get HMM file for each bin

        markerSetParser = MarkerSetParser(options.threads)

        hmmModelInfoFile = os.path.join(options.analyze_dir, 'storage',
                                        DefaultValues.CHECKM_HMM_MODEL_INFO)
        binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile)

        binIdToBinMarkerSets = markerSetParser.getMarkerSets(
            options.analyze_dir, getBinIdsFromOutDir(options.analyze_dir),
            options.marker_file, options.exclude_markers)

        # get results for each bin
        RP = ResultsParser(binIdToModels)
        RP.analyseResults(
            options.analyze_dir,
            DefaultValues.BIN_STATS_OUT,
            DefaultValues.HMMER_TABLE_OUT,
            bIgnoreThresholds=options.bIgnoreThresholds,
            evalueThreshold=options.e_value,
            lengthThreshold=options.length,
            bSkipPseudoGeneCorrection=options.bSkipPseudoGeneCorrection,
            bSkipAdjCorrection=options.bSkipAdjCorrection)

        RP.printSummary(options.out_format,
                        aai,
                        binIdToBinMarkerSets,
                        options.bIndividualMarkers,
                        options.coverage_file,
                        options.bTabTable,
                        options.file,
                        anaFolder=options.analyze_dir)
        RP.cacheResults(options.analyze_dir, binIdToBinMarkerSets,
                        options.bIndividualMarkers)

        if options.file != '':
            self.logger.info('QA information written to: ' + options.file)

        self.timeKeeper.printTimeStamp()
示例#2
0
    def treeQA(self, options):
        """QA command"""
        self.logger.info(
            '[CheckM - tree_qa] Assessing phylogenetic markers found in each bin.'
        )

        checkDirExists(options.tree_dir)

        # set HMM file for each bin
        markerSetParser = MarkerSetParser()
        hmmModelInfoFile = os.path.join(options.tree_dir, 'storage',
                                        DefaultValues.PHYLO_HMM_MODEL_INFO)
        binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile)

        # calculate marker gene statistics
        RP = ResultsParser(binIdToModels)
        binStats = RP.analyseResults(options.tree_dir,
                                     DefaultValues.BIN_STATS_PHYLO_OUT,
                                     DefaultValues.HMMER_TABLE_PHYLO_OUT)

        # determine taxonomy of each bin

        treeParser = TreeParser()
        treeParser.printSummary(options.out_format, options.tree_dir, RP,
                                options.bTabTable, options.file, binStats)

        if options.file != '':
            self.logger.info('QA information written to: ' + options.file)

        self.timeKeeper.printTimeStamp()
示例#3
0
    def lineageSet(self, options, db=None):
        """Lineage set command"""
        self.logger.info(
            '[CheckM - lineage_set] Inferring lineage-specific marker sets.')

        checkDirExists(options.tree_dir)

        # set HMM file for each bin
        markerSetParser = MarkerSetParser()
        hmmModelInfoFile = os.path.join(options.tree_dir, 'storage',
                                        DefaultValues.PHYLO_HMM_MODEL_INFO)
        binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile)

        # calculate marker gene statistics
        resultsParser = ResultsParser(binIdToModels)
        resultsParser.analyseResults(options.tree_dir,
                                     DefaultValues.BIN_STATS_PHYLO_OUT,
                                     DefaultValues.HMMER_TABLE_PHYLO_OUT)

        # These options are incompatible with how the lineage-specific marker set is selected, so
        # the default values are currently hard-coded

        options.num_genomes_markers = 2
        options.bootstrap = 0
        options.bRequireTaxonomy = False

        treeParser = TreeParser()
        treeParser.getBinMarkerSets(
            options.tree_dir, options.marker_file, options.num_genomes_markers,
            options.bootstrap, options.bNoLineageSpecificRefinement,
            options.bForceDomain, options.bRequireTaxonomy, resultsParser,
            options.unique, options.multi)

        self.logger.info('Marker set written to: ' + options.marker_file)

        self.timeKeeper.printTimeStamp()