Exemplo n.º 1
0
def clustersamples(exp,minreads=0):
	"""
	cluster samples in an experiment according to similar behavior
	input:
	exp :Experiment
	minreads : int
		the minimal original number of reads per sample to keep it
	output:
	newexp : Experiment
		the filtered and clustered experiment
	"""
	params=locals()

	newexp=hs.filterorigreads(exp,minreads)
	# normalize each row (bacteria) to sum 1
	dat=copy.copy(newexp.data)
	dat=np.transpose(dat)
	dat[dat<=2]=2
	dat=np.log2(dat)
	# cluster
	dm=spatial.distance.pdist(dat,metric='braycurtis')
	ll=cluster.hierarchy.single(dm)
	order=cluster.hierarchy.leaves_list(ll)

	newexp=hs.reordersamples(newexp,order)
	hs.addcommand(newexp,"clustersamples",params=params,replaceparams={'exp':exp})
	newexp.filters.append("cluster samples minreads=%d" % minreads)
	return newexp
Exemplo n.º 2
0
	def filterorigreads(self):
		items=self.bMainList.selectedItems()
		if len(items)!=1:
			print("Need 1 item")
			return
		for citem in items:
			cname=str(citem.text())
			cexp=self.explist[cname]
			val,ok=QtGui.QInputDialog.getInt(self,'Filter Original Reads','Minimal number of reads per sample',5000,0,100000)
			if ok:
				newexp=hs.filterorigreads(cexp,minreads=val)
				newexp.studyname=newexp.studyname+'_for'
				self.addexp(newexp)
Exemplo n.º 3
0
def subsample(expdat,numreads=10000,inplace=False):
	"""
	subsample (rarify) reads from all samples in an experiment
	input:
	expdat
	numreads - number of reads to subsample to
	inplace - true to replace current experiment

	output:
	newexp - the new subsampled experiment
	"""
	import biom

	params=locals()

	newexp=hs.filterorigreads(expdat,numreads,inplace)
	newexp=hs.toorigreads(newexp,inplace=True)

	table=biom.table.Table(newexp.data,newexp.seqs,newexp.samples)
	table=table.subsample(numreads,axis='observation')
	tids=table.ids(axis='sample')
	for idx,cid in enumerate(tids):
		if not cid==newexp.samples[idx]:
			print('problem with sample ids!!!!')
	newpos=[]
	for cseq in table.ids(axis='observation'):
		newpos.append(newexp.seqdict[cseq])
	newexp=hs.reorderbacteria(newexp,newpos,inplace=True)
	newexp.data=table.matrix_data.todense().A
	newexp=normalizereads(newexp,numreads=10000,inplace=True,fixorig=False)
	for cidx in range(len(newexp.samples)):
		newexp.origreads[cidx]=numreads
	newexp=updateorigreads(newexp)
	newexp.filters.append("subsample to %d" % numreads)
	hs.addcommand(newexp,"subsample",params=params,replaceparams={'expdat':expdat})
	return newexp