def run(self): sample1 = pickle.load( gzip.open(self.sampleobjfile1, "rb") ) sample2 = pickle.load( gzip.open(self.sampleobjfile2, "rb") ) print self.size, os.path.basename(self.sampleobjfile1).split('.')[0], sample1.total, os.path.basename(self.sampleobjfile2).split('.')[0], sample2.total #sampling "size" sequences from sample: subsample1 = iseqlib.samplingSample( sample1, self.size ) #sampled sample subsample2 = iseqlib.samplingSample( sample2, self.size ) #sampled sample #Get different statistics: stats = PairSamplingStats() #initialize stats for the current subsample #Find Sizes of the Union of clones between subsample1 and subsample2: counts1 = [] counts2 = [] for header, seq in subsample1.seqs.iteritems(): counts1.append( seq.count ) if header in subsample2.seqs: counts2.append( subsample2.seqs[header].count ) else: counts2.append( 0 ) for header, seq in subsample2.seqs.iteritems(): if header not in subsample1.seqs: counts1.append( 0 ) counts2.append( seq.count ) if len(counts1) != len(counts2): raise ValueError("The count vectors of two compared samples must have equal length. Found %d and %d\n" %(len(counts1), len(counts2))) ## Initialize the R interface import rpy2.rinterface as rinterface rinterface.set_initoptions(('rpy2', '--no-save')) rinterface.initr() import rpy2.robjects as robjs from rpy2.robjects.packages import importr vegan = importr('vegan') vegdist = vegan.vegdist counts = counts1 counts.extend(counts2) rcountsVec = robjs.IntVector( counts ) #convert python list into R vector rcountsMatrix = robjs.r['matrix'](rcountsVec, nrow = 2, byrow=True) #indices = ['bray', 'horn', 'mountford', 'chao'] indices = self.options.similarityIndices #"manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "morisita", "horn", "mountford", "raup" , "binomial" or "chao" for index in indices: disimilarity = vegdist( rcountsMatrix, method=index ) stats[index] = disimilarity[0] #Write to temp file: picklefile = os.path.join( self.outdir, "%d.pickle" % self.id) pickle.dump( stats, gzip.open(picklefile, "wb") )
def run(self): sample = pickle.load( gzip.open(self.samplefile, "rb") ) if self.options.uniq: subsample = iseqlib.samplingSample_weightedUniq(sample, self.size) #subsample = iseqlib.samplingSample_uniq(sample, self.size) else: subsample = iseqlib.samplingSample(sample, self.size) #filtering if selected Vs and/or selected Js were specified subsample = iseqlib.filterSampleByGenes(subsample, self.options.vs, self.options.js) pickle.dump(subsample, gzip.open(self.outfile, "wb"))
def run(self): globalTempDir = self.getGlobalTempDir() #sampling sample = pickle.load( gzip.open(self.samplefile, "rb") ) if self.options.uniq: subsample = iseqlib.samplingSample_weightedUniq(sample, size) else: subsample = iseqlib.samplingSample(sample, self.size) #filtering if selected Vs and/or selected Js were specified subsample = iseqlib.filterSampleByGenes(subsample, self.options.vs, self.options.js) picklefile = os.path.join(globalTempDir, "%s.pickle" %sample.name) pickle.dump(subsample, gzip.open(picklefile, "wb")) self.addChildTarget( Analyses(picklefile, self.outdir, self.options) )
def run(self): sample = pickle.load( gzip.open(self.sampleobjfile, "rb") ) #sampling "size" sequences from sample: subsample = iseqlib.samplingSample( sample, self.size ) #sampled sample #Get different statistics: stats = SingleSamplingStats() #initialize stats for the current subsample #1/ Number of uniq clones: stats.uniqClones = len( subsample.seqs ) #2/ Diversity Indices counts = [ seq.count for seq in subsample.seqs.values() ] #list of clone sizes ## Initialize the R interface import rpy2.rinterface as rinterface rinterface.set_initoptions(('rpy2', '--no-save')) rinterface.initr() import rpy2.robjects as robjs from rpy2.robjects.packages import importr vegan = importr('vegan') rcounts = robjs.IntVector( counts ) #convert python list into R vector ##Diversity indices (Simpson, InverseSimpson, Shannon) indices = self.options.diversityIndices #['simpson', 'invsimpson', 'shannon'] for index in indices: if index == 'uniqClones' or index == 'fisherAlpha': continue rval = vegan.diversity( rcounts, index ) stats[index] = rval[0] ## Fisher Alpha: rfisher = vegan.fisher_alpha( rcounts ) stats.fisherAlpha = rfisher[0] #Write to temp file: picklefile = os.path.join( self.outdir, "%d.pickle" % self.id) pickle.dump( stats, gzip.open(picklefile, "wb") )