Пример #1
0
    def __init__(self, dbFileName, force=False, scaleFactor=1000):
        # data
        self.dataManager = GMDataManager()  # most data is saved to hdf
        self.dbFileName = dbFileName  # db containing all the data we'd like to use
        self.condition = ""  # condition will be supplied at loading time

        # --> NOTE: ALL of the arrays in this section are in sync
        # --> each one holds information for an individual contig
        self.indices = np_array(
            [])  # indices into the data structure based on condition
        self.covProfiles = np_array([])  # coverage based coordinates
        self.transformedCP = np_array([])  # the munged data points
        self.corners = np_array([])  # the corners of the tranformed space
        self.TCentre = 0.  # the centre of the coverage space
        self.transRadius = 0.  # distance from corner to centre of transformed space
        self.averageCoverages = np_array(
            [])  # average coverage across all stoits
        self.normCoverages = np_array([])  # norm of the raw coverage vectors
        self.kmerSigs = np_array([])  # raw kmer signatures
        self.kmerNormPC1 = np_array(
            [])  # First PC of kmer sigs normalized to [0, 1]
        self.kmerPCs = np_array(
            [])  # PCs of kmer sigs capturing specified variance
        self.kmerVarPC = np_array([])  # variance of each PC
        self.stoitColNames = np_array([])
        self.contigNames = np_array([])
        self.contigLengths = np_array([])
        self.contigGCs = np_array([])
        self.colorMapGC = None

        self.binIds = np_array([])  # list of bin IDs
        # --> end section

        # meta
        self.validBinIds = {}  # valid bin ids -> numMembers
        self.isLikelyChimeric = {
        }  # indicates if a bin is likely to be chimeric
        self.binnedRowIndices = {
        }  # dictionary of those indices which belong to some bin
        self.restrictedRowIndices = {
        }  # dictionary of those indices which can not be binned yet
        self.numContigs = 0  # this depends on the condition given
        self.numStoits = 0  # this depends on the data which was parsed

        # contig links
        self.links = {}

        # misc
        self.forceWriting = force  # overwrite existng values silently?
        self.scaleFactor = scaleFactor  # scale every thing in the transformed data to this dimension
Пример #2
0
    def parseOptions(self, options):
        timer = gtime.TimeKeeper()
        if (options.subparser_name == 'parse'):
            # parse raw input
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in data parsing mode..." % self.GMVersion
            print "*******************************************************************************"
            # check this here:
            if len(options.bamfiles) < 3:
                print "Sorry, You must supply at least 3 bamFiles to use GroopM. (You supplied %d)\n Exiting..." % len(
                    options.bamfiles)
                return
            GMdata = mstore.GMDataManager()
            success = GMdata.createDB(options.bamfiles,
                                      options.reference,
                                      options.dbname,
                                      options.cutoff,
                                      timer,
                                      force=options.force,
                                      threads=options.threads)
            if not success:
                print options.dbname, "not updated"

        elif (options.subparser_name == 'core'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in core creation mode..." % self.GMVersion
            print "*******************************************************************************"
            CE = cluster.ClusterEngine(options.dbname,
                                       timer,
                                       force=options.force,
                                       finalPlot=options.plot,
                                       plot=options.multiplot,
                                       minSize=options.size,
                                       minVol=options.bp)
            if options.graphfile is None:
                gf = ""
            else:
                gf = options.graphfile
            CE.makeCores(coreCut=options.cutoff, gf=gf)

        elif (options.subparser_name == 'refine'):
            # refine bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in core refining mode..." % self.GMVersion
            print "*******************************************************************************"
            bids = []
            #if options.bids is not None:
            #    bids = options.bids
            auto = options.auto
            transform = True ^ options.no_transform

            RE = refine.RefineEngine(timer,
                                     dbFileName=options.dbname,
                                     transform=transform,
                                     bids=bids,
                                     loadContigNames=True)

            if options.plot:
                pfx = "REFINED"
            else:
                pfx = ""
            print "Refine bins"

            RE.refineBins(timer, auto=auto, saveBins=True, plotFinal=pfx)

        elif (options.subparser_name == 'recruit'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in bin expansion mode..." % self.GMVersion
            print "*******************************************************************************"
            RE = refine.RefineEngine(timer,
                                     dbFileName=options.dbname,
                                     getUnbinned=True,
                                     loadContigNames=False,
                                     cutOff=options.cutoff)

            RE.recruitWrapper(timer,
                              inclusivity=options.inclusivity,
                              step=options.step,
                              saveBins=True)

        elif (options.subparser_name == 'extract'):
            # Extract data
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in '%s' extraction mode..." % (
                self.GMVersion, options.mode)
            print "*******************************************************************************"
            bids = []
            if options.bids is not None:
                bids = options.bids
            BX = groopmUtils.GMExtractor(options.dbname,
                                         bids=bids,
                                         folder=options.out_folder)
            if (options.mode == 'contigs'):
                BX.extractContigs(timer,
                                  fasta=options.data,
                                  prefix=options.prefix,
                                  cutoff=options.cutoff)

            elif (options.mode == 'reads'):
                BX.extractReads(timer,
                                bams=options.data,
                                prefix=options.prefix,
                                mixBams=options.mix_bams,
                                mixGroups=options.mix_groups,
                                mixReads=options.mix_reads,
                                interleaved=options.interleave,
                                bigFile=options.no_gzip,
                                headersOnly=options.headers_only,
                                minMapQual=options.mapping_quality,
                                maxMisMatches=options.max_distance,
                                useSuppAlignments=options.use_supplementary,
                                useSecondaryAlignments=options.use_secondary,
                                verbose=options.verbose,
                                threads=options.threads)

            else:
                raise ExtractModeNotAppropriateException("mode: " +
                                                         options.mode +
                                                         " is unknown")
        elif (options.subparser_name == 'merge'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in bin merging mode..." % self.GMVersion
            print "*******************************************************************************"
            BM = binManager.BinManager(dbFileName=options.dbname)
            BM.loadBins(timer, makeBins=True, silent=False)
            BM.merge(options.bids, options.force, saveBins=True)

        elif (options.subparser_name == 'split'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in bin splitting mode..." % self.GMVersion
            print "*******************************************************************************"
            BM = binManager.BinManager(dbFileName=options.dbname)
            BM.loadBins(timer, makeBins=True, silent=False)
            BM.split(options.bid,
                     options.parts,
                     mode=options.mode,
                     saveBins=True,
                     auto=options.force)

        elif (options.subparser_name == 'delete'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in bin deleting mode..." % self.GMVersion
            print "*******************************************************************************"
            BM = binManager.BinManager(dbFileName=options.dbname)
            BM.loadBins(timer, makeBins=True,
                        silent=True)  #, bids=options.bids)
            BM.deleteBins(options.bids,
                          force=options.force,
                          saveBins=True,
                          freeBinnedRowIndices=True)

        elif (options.subparser_name == 'plot'):
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in bin plotting mode..." % self.GMVersion
            print "*******************************************************************************"
            BM = binManager.BinManager(dbFileName=options.dbname)

            if options.bids is None:
                bids = []
            else:
                bids = options.bids
            BM.loadBins(timer,
                        makeBins=True,
                        silent=False,
                        bids=bids,
                        loadContigNames=False)

            BM.setColorMap(options.cm)

            BM.plotBins(FNPrefix=options.tag,
                        plotEllipsoid=True,
                        ignoreContigLengths=options.points,
                        folder=options.folder)

        elif (options.subparser_name == 'explore'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in bin '%s' explorer mode..." % (
                self.GMVersion, options.mode)
            print "*******************************************************************************"
            transform = True ^ options.no_transform
            bids = []
            if options.bids is not None:
                bids = options.bids
            BE = groopmUtils.BinExplorer(options.dbname,
                                         bids=bids,
                                         transform=transform,
                                         cmstring=options.cm,
                                         ignoreContigLengths=options.points)
            if (options.mode == 'binpoints'):
                BE.plotPoints(timer)
            elif (options.mode == 'binids'):
                BE.plotIds(timer)
            elif (options.mode == 'allcontigs'):
                BE.plotContigs(timer, coreCut=options.cutoff, all=True)
            elif (options.mode == 'unbinnedcontigs'):
                BE.plotUnbinned(timer, coreCut=options.cutoff)
            elif (options.mode == 'binnedcontigs'):
                BE.plotContigs(timer, coreCut=options.cutoff)
            elif (options.mode == 'binassignments'):
                BE.plotBinAssignents(timer, coreCut=options.cutoff)
            elif (options.mode == 'compare'):
                BE.plotCompare(timer, coreCut=options.cutoff)
            elif (options.mode == 'together'):
                BE.plotTogether(timer,
                                coreCut=options.cutoff,
                                doMers=options.kmers)
            elif (options.mode == 'sidebyside'):
                BE.plotSideBySide(timer, coreCut=options.cutoff)
            else:
                print "**Error: unknown mode:", options.mode

        elif (options.subparser_name == 'flyover'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Making a flyover..." % self.GMVersion
            print "*******************************************************************************"
            bids = []
            if options.bids is not None:
                bids = options.bids
            BE = groopmUtils.BinExplorer(options.dbname,
                                         bids=bids,
                                         transform=True,
                                         ignoreContigLengths=options.points)
            BE.plotFlyOver(timer,
                           fps=options.fps,
                           totalTime=options.totalTime,
                           percentFade=options.firstFade,
                           prefix=options.prefix,
                           showColorbar=options.colorbar,
                           title=options.title,
                           coreCut=options.cutoff,
                           format=options.format)

        elif (options.subparser_name == 'highlight'):
            # make bin cores
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in highlighter mode..." % self.GMVersion
            print "*******************************************************************************"
            bids = []
            if options.bids is not None:
                bids = options.bids
            BE = groopmUtils.BinExplorer(options.dbname,
                                         bids=bids,
                                         binLabelsFile=options.binlabels,
                                         contigColorsFile=options.contigcolors,
                                         ignoreContigLengths=options.points)
            BE.plotHighlights(timer,
                              options.elevation,
                              options.azimuth,
                              options.file,
                              options.filetype,
                              options.dpi,
                              drawRadius=options.radius,
                              show=options.show,
                              coreCut=options.cutoff,
                              testing=options.place)

        elif (options.subparser_name == 'print'):
            BM = binManager.BinManager(dbFileName=options.dbname)
            bids = []
            if options.bids is not None:
                bids = options.bids
            BM.loadBins(timer,
                        getUnbinned=options.unbinned,
                        makeBins=True,
                        silent=True,
                        bids=bids)
            BM.printBins(options.format, fileName=options.outfile)

        elif (options.subparser_name == 'dump'):
            print "*******************************************************************************"
            print " [[GroopM %s]] Running in data dumping mode..." % self.GMVersion
            print "*******************************************************************************"

            # prep fields. Do this first cause users are mot likely to
            # mess this part up!
            allowable_fields = [
                'names', 'mers', 'gc', 'coverage', 'tcoverage', 'ncoverage',
                'lengths', 'bins', 'all'
            ]
            fields = options.fields.split(',')
            for field in fields:
                if field not in allowable_fields:
                    print "ERROR: field '%s' not recognised. Allowable fields are:" % field
                    print '\t', ",".join(allowable_fields)
                    return
            if options.separator == '\\t':
                separator = '\t'
            else:
                separator = options.separator

            DM = GMDataManager()
            DM.dumpData(options.dbname, fields, options.outfile, separator,
                        not options.no_headers)

        return 0