def __init__(self, dbFileName, force=False, scaleFactor=1000): # data self.dataManager = GMDataManager() # most data is saved to hdf self.dbFileName = dbFileName # db containing all the data we'd like to use self.condition = "" # condition will be supplied at loading time # --> NOTE: ALL of the arrays in this section are in sync # --> each one holds information for an individual contig self.indices = np_array( []) # indices into the data structure based on condition self.covProfiles = np_array([]) # coverage based coordinates self.transformedCP = np_array([]) # the munged data points self.corners = np_array([]) # the corners of the tranformed space self.TCentre = 0. # the centre of the coverage space self.transRadius = 0. # distance from corner to centre of transformed space self.averageCoverages = np_array( []) # average coverage across all stoits self.normCoverages = np_array([]) # norm of the raw coverage vectors self.kmerSigs = np_array([]) # raw kmer signatures self.kmerNormPC1 = np_array( []) # First PC of kmer sigs normalized to [0, 1] self.kmerPCs = np_array( []) # PCs of kmer sigs capturing specified variance self.kmerVarPC = np_array([]) # variance of each PC self.stoitColNames = np_array([]) self.contigNames = np_array([]) self.contigLengths = np_array([]) self.contigGCs = np_array([]) self.colorMapGC = None self.binIds = np_array([]) # list of bin IDs # --> end section # meta self.validBinIds = {} # valid bin ids -> numMembers self.isLikelyChimeric = { } # indicates if a bin is likely to be chimeric self.binnedRowIndices = { } # dictionary of those indices which belong to some bin self.restrictedRowIndices = { } # dictionary of those indices which can not be binned yet self.numContigs = 0 # this depends on the condition given self.numStoits = 0 # this depends on the data which was parsed # contig links self.links = {} # misc self.forceWriting = force # overwrite existng values silently? self.scaleFactor = scaleFactor # scale every thing in the transformed data to this dimension
def parseOptions(self, options): timer = gtime.TimeKeeper() if (options.subparser_name == 'parse'): # parse raw input print "*******************************************************************************" print " [[GroopM %s]] Running in data parsing mode..." % self.GMVersion print "*******************************************************************************" # check this here: if len(options.bamfiles) < 3: print "Sorry, You must supply at least 3 bamFiles to use GroopM. (You supplied %d)\n Exiting..." % len( options.bamfiles) return GMdata = mstore.GMDataManager() success = GMdata.createDB(options.bamfiles, options.reference, options.dbname, options.cutoff, timer, force=options.force, threads=options.threads) if not success: print options.dbname, "not updated" elif (options.subparser_name == 'core'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in core creation mode..." % self.GMVersion print "*******************************************************************************" CE = cluster.ClusterEngine(options.dbname, timer, force=options.force, finalPlot=options.plot, plot=options.multiplot, minSize=options.size, minVol=options.bp) if options.graphfile is None: gf = "" else: gf = options.graphfile CE.makeCores(coreCut=options.cutoff, gf=gf) elif (options.subparser_name == 'refine'): # refine bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in core refining mode..." % self.GMVersion print "*******************************************************************************" bids = [] #if options.bids is not None: # bids = options.bids auto = options.auto transform = True ^ options.no_transform RE = refine.RefineEngine(timer, dbFileName=options.dbname, transform=transform, bids=bids, loadContigNames=True) if options.plot: pfx = "REFINED" else: pfx = "" print "Refine bins" RE.refineBins(timer, auto=auto, saveBins=True, plotFinal=pfx) elif (options.subparser_name == 'recruit'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin expansion mode..." % self.GMVersion print "*******************************************************************************" RE = refine.RefineEngine(timer, dbFileName=options.dbname, getUnbinned=True, loadContigNames=False, cutOff=options.cutoff) RE.recruitWrapper(timer, inclusivity=options.inclusivity, step=options.step, saveBins=True) elif (options.subparser_name == 'extract'): # Extract data print "*******************************************************************************" print " [[GroopM %s]] Running in '%s' extraction mode..." % ( self.GMVersion, options.mode) print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BX = groopmUtils.GMExtractor(options.dbname, bids=bids, folder=options.out_folder) if (options.mode == 'contigs'): BX.extractContigs(timer, fasta=options.data, prefix=options.prefix, cutoff=options.cutoff) elif (options.mode == 'reads'): BX.extractReads(timer, bams=options.data, prefix=options.prefix, mixBams=options.mix_bams, mixGroups=options.mix_groups, mixReads=options.mix_reads, interleaved=options.interleave, bigFile=options.no_gzip, headersOnly=options.headers_only, minMapQual=options.mapping_quality, maxMisMatches=options.max_distance, useSuppAlignments=options.use_supplementary, useSecondaryAlignments=options.use_secondary, verbose=options.verbose, threads=options.threads) else: raise ExtractModeNotAppropriateException("mode: " + options.mode + " is unknown") elif (options.subparser_name == 'merge'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin merging mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=False) BM.merge(options.bids, options.force, saveBins=True) elif (options.subparser_name == 'split'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin splitting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=False) BM.split(options.bid, options.parts, mode=options.mode, saveBins=True, auto=options.force) elif (options.subparser_name == 'delete'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin deleting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=True) #, bids=options.bids) BM.deleteBins(options.bids, force=options.force, saveBins=True, freeBinnedRowIndices=True) elif (options.subparser_name == 'plot'): print "*******************************************************************************" print " [[GroopM %s]] Running in bin plotting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) if options.bids is None: bids = [] else: bids = options.bids BM.loadBins(timer, makeBins=True, silent=False, bids=bids, loadContigNames=False) BM.setColorMap(options.cm) BM.plotBins(FNPrefix=options.tag, plotEllipsoid=True, ignoreContigLengths=options.points, folder=options.folder) elif (options.subparser_name == 'explore'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin '%s' explorer mode..." % ( self.GMVersion, options.mode) print "*******************************************************************************" transform = True ^ options.no_transform bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, transform=transform, cmstring=options.cm, ignoreContigLengths=options.points) if (options.mode == 'binpoints'): BE.plotPoints(timer) elif (options.mode == 'binids'): BE.plotIds(timer) elif (options.mode == 'allcontigs'): BE.plotContigs(timer, coreCut=options.cutoff, all=True) elif (options.mode == 'unbinnedcontigs'): BE.plotUnbinned(timer, coreCut=options.cutoff) elif (options.mode == 'binnedcontigs'): BE.plotContigs(timer, coreCut=options.cutoff) elif (options.mode == 'binassignments'): BE.plotBinAssignents(timer, coreCut=options.cutoff) elif (options.mode == 'compare'): BE.plotCompare(timer, coreCut=options.cutoff) elif (options.mode == 'together'): BE.plotTogether(timer, coreCut=options.cutoff, doMers=options.kmers) elif (options.mode == 'sidebyside'): BE.plotSideBySide(timer, coreCut=options.cutoff) else: print "**Error: unknown mode:", options.mode elif (options.subparser_name == 'flyover'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Making a flyover..." % self.GMVersion print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, transform=True, ignoreContigLengths=options.points) BE.plotFlyOver(timer, fps=options.fps, totalTime=options.totalTime, percentFade=options.firstFade, prefix=options.prefix, showColorbar=options.colorbar, title=options.title, coreCut=options.cutoff, format=options.format) elif (options.subparser_name == 'highlight'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in highlighter mode..." % self.GMVersion print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, binLabelsFile=options.binlabels, contigColorsFile=options.contigcolors, ignoreContigLengths=options.points) BE.plotHighlights(timer, options.elevation, options.azimuth, options.file, options.filetype, options.dpi, drawRadius=options.radius, show=options.show, coreCut=options.cutoff, testing=options.place) elif (options.subparser_name == 'print'): BM = binManager.BinManager(dbFileName=options.dbname) bids = [] if options.bids is not None: bids = options.bids BM.loadBins(timer, getUnbinned=options.unbinned, makeBins=True, silent=True, bids=bids) BM.printBins(options.format, fileName=options.outfile) elif (options.subparser_name == 'dump'): print "*******************************************************************************" print " [[GroopM %s]] Running in data dumping mode..." % self.GMVersion print "*******************************************************************************" # prep fields. Do this first cause users are mot likely to # mess this part up! allowable_fields = [ 'names', 'mers', 'gc', 'coverage', 'tcoverage', 'ncoverage', 'lengths', 'bins', 'all' ] fields = options.fields.split(',') for field in fields: if field not in allowable_fields: print "ERROR: field '%s' not recognised. Allowable fields are:" % field print '\t', ",".join(allowable_fields) return if options.separator == '\\t': separator = '\t' else: separator = options.separator DM = GMDataManager() DM.dumpData(options.dbname, fields, options.outfile, separator, not options.no_headers) return 0