def clusterbacteria(exp,minreads=0,uselog=True): """ cluster bacteria in an experiment according to similar behavior input: exp : Experiment minreads : int the minimal number of reads to keep before clustering (to make faster) uselog : bool True to log transform reads for clustering (before normalizing), false to use full reads output: newexp : Experiment the filtered and clustered experiment """ params=locals() newexp=hs.filterminreads(exp,minreads,logit=False) # normalize each row (bacteria) to sum 1 dat=copy.copy(newexp.data) if uselog: dat[dat<=2]=2 dat=np.log2(dat) dat=scale(dat,axis=1,copy=False) # cluster dm=spatial.distance.pdist(dat,metric='euclidean') ll=cluster.hierarchy.single(dm) order=cluster.hierarchy.leaves_list(ll) newexp=hs.reorderbacteria(newexp,order) hs.addcommand(newexp,"clusterbacteria",params=params,replaceparams={'exp':exp}) newexp.filters.append("cluster bacteria minreads=%d" % minreads) return newexp
def filterminreads(exp,minreads,logit=True,useabs=False): """ filter away all bacteria that contain less than minreads in all samples together (out of 10k/samples) input: exp : Experiment minreads : float the minimum number of reads total for all samples (and out of 10k/sample) for a bacteria to be kept logit : bool True to add to command log, False to not (if called from another heatsequer function) output: newexp - the filtered experiment """ params=locals() if useabs: numreads=np.sum(np.abs(exp.data),axis=1) else: numreads=np.sum(exp.data,axis=1) keep=np.where(numreads>=minreads) newexp=hs.reorderbacteria(exp,keep[0]) if logit: newexp.filters.append('filter min reads %d' % minreads) hs.addcommand(newexp,"filterminreads",params=params,replaceparams={'exp':exp}) hs.Debug(6,'%d Bacteria left' % len(newexp.sids)) return newexp
def sortbacteria(exp,inplace=False,logit=True): """ sort bacteria according to taxonomy (alphabetically) input: exp : experiment the experiment to sort inplace : bool True to sort in place (replace current experiment), False to create a new experiment logit : bool True to add to command log, False to skip (if called from other heatsequer function) output: newexp : experiment The sorted experiment (by taxonomy name) """ params=locals() tax=exp.tax svals,sidx=hs.isort(tax) newexp=hs.reorderbacteria(exp,sidx,inplace=inplace) if logit: newexp.filters.append('sorted bacteria by taxonomy') hs.addcommand(newexp,"sortbacteria",params=params,replaceparams={'exp':exp}) return newexp
def sortbycentermass(expdat,field=False,numeric=True,uselog=True): """ sort bacteria in the experiment according to a 1d gradient by calculating the center of mass input: expdat field : string the name of the field to sort by or False to skip sorting numeric : bool True if the sort field is numeric (ignored if no sort field) uselog : bool True to log transform the data before mass center calculation output: newexp - the experiment with sorted bacteria """ params=locals() if field: newexp=hs.sortsamples(expdat,field,numeric=numeric) else: newexp=hs.copyexp(expdat) dat=newexp.data if uselog: dat[dat<1]=1 dat=np.log2(dat) cm=[] multpos=np.arange(len(newexp.samples)) for cseqind in range(len(newexp.seqs)): cm.append(np.dot(dat[cseqind,:],multpos)/np.sum(dat[cseqind,:])) sv,si=hs.isort(cm) newexp=hs.reorderbacteria(expdat,si) newexp.filters.append("sort by center of mass field=%s, uselog=%s" % (field,uselog)) hs.addcommand(newexp,"sortbycentermass",params=params,replaceparams={'expdat':expdat}) return newexp
def sortbyvariance(expdat,field=False,value=False,exact=False,norm=False): """ sort bacteria by their variance sorting is performed based on a subset of samples (field/val/exact) and then all the experiment is sorted according to them input: expdat : Experiment field : string name of the field to filter samples for freq. sorting or False for all samples value : string value of samples to use for the freq. sorting exact : bool is the value exact or partial string norm : bool - False to sort by varinace, True to sort by variance/mean output: newexp : Experiment the experiment with bacteria sorted according to subgroup freq. """ params=locals() if field: texp=hs.filtersamples(expdat,field,value,exact=exact) else: texp=copy.deepcopy(expdat) svals=np.std(texp.data,axis=1) if norm: svals=svals/np.mean(texp.data,axis=1) svals,sidx=hs.isort(svals) newexp=hs.reorderbacteria(expdat,sidx) newexp.filters.append("sort by variance field=%s value=%s normalize=%s" % (field,value,norm)) hs.addcommand(newexp,"sortbyvariance",params=params,replaceparams={'expdat':expdat}) return newexp
def filterid(expdat,sids,exclude=False): """ filter bacteria keeping only ones in sids input: expdat : Experiment sids : list of integers the list of (hashed) sequence ids exclude : bool False to keep these bacteria, True to filter away output: newexp : Experiment the filtered experiment """ params=locals() if not type(sids) is list: sids=[sids] keep=[] hs.Debug(1,'filter ids',sids) for cid in sids: for idx,tid in enumerate(expdat.sids): if tid==cid: keep.append(idx) if exclude: keep=set(range(len(expdat.sids))).difference(keep) keep=list(set(keep)) hs.Debug(1,'keep pos',keep) newexp=hs.reorderbacteria(expdat,keep) if exclude: newexp.filters.append('Filter %d ids (exclude)' % len(sids)) else: newexp.filters.append('Filter %d ids' % len(sids)) hs.addcommand(newexp,"filterid",params=params,replaceparams={'expdat':expdat}) return newexp
def filterannotations(expdat,annotation,cdb=None,exclude=False): """ filter keeping only samples which have annotation in their cooldb description input: expdat annotation - substring of the annotation (case insensitive) cdb - the database of cool sequences (from cooldb.load()) or None (default) to use the heatsequer loaded cdb exclude - False to keep matching bacteria, True to remove matching bacteria output: newexp - the filtered experiment """ params=locals() if cdb is None: cdb=hs.cdb keeplist=[] for idx,cseq in enumerate(expdat.seqs): keep=False info=hs.cooldb.getseqinfo(cdb,cseq) for cinfo in info: if annotation.lower() in str(cinfo).lower(): keep=True if exclude: keep = not keep if keep: keeplist.append(idx) newexp=hs.reorderbacteria(expdat,keeplist) newexp.filters.append('Filter annotations %s' % annotation) hs.addcommand(newexp,"filterannotations",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d bacteria found' % len(keeplist)) return newexp
def filterknownbact(expdat,cdb=None,exclude=False): """ filter keeping only bacteria which we know about in cooldb input: expdat : Experiment cdb : cooldb the manual annotation database (fromn cooldb.loaddb) exclude : bool True to throw away known bacteria, False to keep only them output: newexp : Experiment the filtered experiment """ params=locals() if cdb is None: cdb=hs.cdb known=[] for idx,cseq in enumerate(expdat.seqs): if len(hs.cooldb.getseqinfo(cdb,cseq))>0: known.append(idx) hs.Debug(2,'Found %d sequences known in cooldb' % len(known)) if exclude: known=set(range(len(expdat.seqs))).difference(known) newexp=hs.reorderbacteria(expdat,known) if not exclude: newexp.filters.append('filter cooldb known bacteria') else: newexp.filters.append('filter exclude cooldb known bacteria') hs.Debug(6,'%d bacteria left' % len(newexp.sids)) newexp.filters.append('keep only sequences from cooldb') hs.addcommand(newexp,"filterknownbact",params=params,replaceparams={'expdat':expdat}) return newexp
def bicluster(self): items=self.bMainList.selectedItems() if len(items)!=1: print("Need 1 item") return for citem in items: cname=str(citem.text()) cexp=self.explist[cname] biclusterwin = BiClusterWindow(cexp,cdb=self.cooldb,bdb=self.bactdb) res=biclusterwin.exec_() if res==QtGui.QDialog.Accepted: newexp=hs.reorderbacteria(cexp,cexp.bactorder) newexp=hs.reorderbacteria(newexp,cexp.samporder) newexp.studyname=newexp.studyname+'_bicluster' newexp.filters.append("bicluster") self.addexp(newexp)
def sortbysign(expdat,field=False,value='',exclude=False,exact=True,maxfval=0.2): """ sort bacteria in the experiment based on the number of positive/negative samples (ignoring nans) input: expdat : Experiment field,value,exclude,exact : name of field and value of field in order to sort based only on these samples or field=False for all samples (default) maxfval - the maximal f-value output: newexp : Experiment sorted by difference between positive/negative """ params=locals() if field: texp=hs.filtersamples(expdat,field,value,exact=exact,exclude=exclude) else: texp=hs.copyexp(expdat) texp.data=np.sign(texp.data) numpos=np.nansum(texp.data>0,axis=1) numneg=np.nansum(texp.data<0,axis=1) pval=np.ones(len(numpos)) for cpos in range(len(pval)): if numpos[cpos]==0 and numneg[cpos]==0: continue pval1=stats.binom.cdf(numpos[cpos],numpos[cpos]+numneg[cpos],0.5) pval2=stats.binom.cdf(numneg[cpos],numpos[cpos]+numneg[cpos],0.5) pval[cpos]=np.min([pval1,pval2]) signs=np.nanmean(texp.data,axis=1) fval=hs.fdr(pval) keep=np.where(np.array(fval)<=maxfval)[0] newexp=hs.reorderbacteria(expdat,keep) signs=signs[keep] si=np.argsort(signs) newexp=hs.reorderbacteria(newexp,si) newexp.filters.append("sort by sign field %s max-f-val %f" % (field,maxfval)) hs.addcommand(newexp,"sortbysign",params=params,replaceparams={'expdat':expdat}) return newexp
def filterfieldwave(expdat,field,val1,val2=False,mineffect=1,method='mean',uselog=True): """ find all sequences which show an effect size of at least mineffect between val1 and val2 samples in field no statistical significance testing is performed input: expdat : Experiment field : string name of field to use for group separation val1 : string value in field for group1 val2 : string value in field for group2 or False for all the other samples except val1 mineffect : float min difference between groups per OTU in order to keep method: string 'ranksum' uselog : bool True to log transform the data output: newexp : Experiment only with sequences showing a mineffect difference """ params=locals() numseqs=len(expdat.seqs) numsamples=len(expdat.samples) dat=expdat.data if uselog: dat[dat<1]=1 dat=np.log2(dat) if method=='ranksum': for idx in range(numseqs): dat[idx,:]=stats.rankdata(dat[idx,:]) pos1=hs.findsamples(expdat,field,val1) if val2: pos2=hs.findsamples(expdat,field,val2) else: pos2=np.setdiff1d(np.arange(numsamples),pos1,assume_unique=True) outpos=[] odif=[] for idx in range(numseqs): cdif=np.mean(dat[idx,pos1])-np.mean(dat[idx,pos2]) if abs(cdif)>=mineffect: outpos.append(idx) odif.append(cdif) si=np.argsort(odif) outpos=hs.reorder(outpos,si) newexp=hs.reorderbacteria(expdat,outpos) newexp.filters.append('filterfieldwave field %s val1 %s val2 %s' % (field,val1,val2)) hs.addcommand(newexp,"filterfieldwave",params=params,replaceparams={'expdat':expdat}) return newexp
def view(self): cexp=self.cexp allsamp=np.arange(len(cexp.samples)) allbact=np.arange(len(cexp.seqs)) x=np.setdiff1d(allsamp,self.samples) sampo=np.concatenate((self.samples,x)) ubact=[] for cseq in self.seqs: ubact.append(cexp.seqdict[cseq]) bacto=np.concatenate((ubact,np.setdiff1d(allbact,ubact))) newexp=hs.reorderbacteria(cexp,bacto) newexp=hs.reordersamples(newexp,sampo,inplace=True) hs.plotexp(newexp,seqdb=self.bactdb,sortby=False,numeric=False,usegui=True,cdb=self.cooldb,showline=False)
def filterseqs(expdat,seqs,exclude=False,subseq=False,logit=True): """ filter sequences from the list seqs (keeping sequences appearing in the list) input: expdat : Experiment seqs : string a list of (ACGT) sequences to keep exclude : bool True to filter away instead of keep, False to keep subseq : bool if true, the sequences can be subsequence (slower). False - look only for exact match. logit : bool True to add to command log, false to not log it (if called from other heatsequer function) output: newexp : Experiment the filtered experiment """ params=locals() keeplist=[] for cseq in seqs: if subseq: for idx,coseq in enumerate(expdat.seqs): if len(cseq)>len(coseq): if coseq in cseq: keeplist.append(idx) break else: if cseq in coseq: keeplist.append(idx) break else: if cseq in expdat.seqdict: keeplist.append(expdat.seqdict[cseq]) else: hs.Debug(7,'sequence not in experiment',cseq) if exclude: keeplist=list(set(range(len(expdat.seqs))).difference(keeplist)) newexp=hs.reorderbacteria(expdat,keeplist) if logit: newexp.filters.append('filter sequences') hs.addcommand(newexp,"filterseqs",params=params,replaceparams={'expdat':expdat}) return newexp
def sortbyfreq(expdat,field=False,value=False,exact=False,exclude=False,logscale=True,useabs=False): """ sort bacteria in experiment according to frequency sorting is performed based on a subset of samples (field/val/exact) and then all the experiment is sorted according to them input: expdat : Experiment field : string name of the field to filter samples for freq. sorting or False for all samples value : string value of samples to use for the freq. sorting exact : bool is the value exact or partial string exclude : bool True to sort on all samples except the field/value ones, False to sort only on field/value samples (default=False) logscale : bool True (default) to use log2 transform for frequencies before mean and sorting, False to use original values useabs : bool True to sort by absolute value of freq, False (default) to sort by freq output: newexp : Experiment the experiment with bacteria sorted according to subgroup freq. """ params=locals() if field: texp=hs.filtersamples(expdat,field,value,exact=exact,exclude=exclude) else: texp=copy.deepcopy(expdat) if logscale: texp.data[texp.data<2]=2 texp.data=np.log2(texp.data) if useabs: meanvals=np.mean(np.abs(texp.data),axis=1) else: meanvals=np.mean(texp.data,axis=1) svals,sidx=hs.isort(meanvals) newexp=hs.reorderbacteria(expdat,sidx) newexp.filters.append("sort by freq field=%s value=%s" % (field,value)) hs.addcommand(newexp,"sortbyfreq",params=params,replaceparams={'expdat':expdat}) return newexp
def filternans(expdat,minpresence): """ filter an experiment containing nans in the table, keeping only bacteria with at least minpresence non-nan values input: expdat : Experiment minpresence: int minimal number of non-nan samples (keep only if >=) output: newexp : Experiment """ params=locals() numok=np.sum(np.isfinite(expdat.data),axis=1) keep=np.where(numok>=minpresence)[0] print(len(keep)) newexp=hs.reorderbacteria(expdat,keep) newexp.filters.append('filternans keep only with >=%d non nan reads' % minpresence) hs.addcommand(newexp,"filternans",params=params,replaceparams={'expdat':expdat}) return newexp
def sortcorrelation(expdat,method='all'): """ EXPERIMENTAL sort bacteria according to highest correlation/anti-correlation input: expdat method: pres - use correlation only on samples where present in one of the two sequnences all - use correlation on all samples (default) output: newexp - the experiment with bacteria sorted by correlation (each time next bacteria the most abs(corr) to the current bacteria) """ params=locals() cdat=copy.copy(expdat.data) cdat[cdat<=2]=2 cdat=np.log2(cdat) cdat=scale(cdat,axis=1,copy=False,with_mean=False) hs.Debug(6,"Calculating correlation matrix") cmat=np.corrcoef(cdat) hs.Debug(6,"sorting bacteria") cmat=np.abs(cmat) cmat-=np.identity(len(expdat.seqs)) maxpos=np.argmax(cmat) maxpos=np.unravel_index(maxpos,np.shape(cmat)) order=[maxpos[0]] ubact=np.arange(len(expdat.seqs)) ubact=np.delete(ubact,maxpos[0]) maxpos=maxpos[0] while len(ubact)>0: cdat=cmat[ubact,maxpos] cdat=cdat.flatten() maxpos=np.argmax(cdat) order.append(ubact[maxpos]) ubact=np.delete(ubact,maxpos) newexp=hs.reorderbacteria(expdat,order) newexp.filters.append("correlation sort") hs.addcommand(newexp,"sortcorrelation",params=params,replaceparams={'expdat':expdat}) return newexp
def filterminsamples(expdat,minsamples): """ filter away bacteria present in less than frac of the samples input: expdat : Experiment minsamples : int the minimal number of samples where the bacteria appears output: newexp : Experiment the filtered experiment """ params=locals() numsamples=np.sum(expdat.data>0,axis=1) keep=np.where(numsamples>=minsamples) newexp=hs.reorderbacteria(expdat,keep[0]) newexp.filters.append('filter min samples %d' % minsamples) hs.addcommand(newexp,"filterpresence",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d Bacteria left' % len(newexp.sids)) return newexp
def filterpresence(expdat,frac): """ filter away bacteria present in less than frac of the samples input: expdat : Experiment frac : float the minimal fraction of samples to appear in for a beacteria to be kept output: newexp : Experiment the filtered experiment """ params=locals() fracreads=(np.sum(expdat.data>0,axis=1)+0.0)/len(expdat.samples) keep=np.where(fracreads>=frac) newexp=hs.reorderbacteria(expdat,keep[0]) newexp.filters.append('filter presence %f' % frac) hs.addcommand(newexp,"filterpresence",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d Bacteria left' % len(newexp.sids)) return newexp
def filtermean(expdat,meanval): """ filter keeping bacteria with a mean frequency >= meanval input: expdat : Experiment the experiment meanval : float the minimum mean fraction of reads per sample (NOT out of 10k/sample) for a bacteria to be kept output: newexp : Experiment the filtered experiment """ params=locals() meanreads=np.mean(expdat.data,axis=1) meantotreads=np.mean(np.sum(expdat.data,0)) keep=np.where(meanreads>=meanval*meantotreads) newexp=hs.reorderbacteria(expdat,keep[0]) newexp.filters.append('filter mean reads %f' % meanval) hs.addcommand(newexp,"filtermean",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d Bacteria left' % len(newexp.sids)) return newexp
def filtertaxonomy(exp,tax,exact=False,exclude=False): """ filter bacteria matching a given taxonomy name input: exp : Experiment tax : string the taxonomy name to filter by exact : bool True for exact matches to tax string, false for substring exclude : bool True to throw away matching taxonomy, False to keep matching """ params=locals() match=[] for cidx,ctax in enumerate(exp.tax): keep=False if exact: if ctax==tax: keep=True else: if tax in ctax: keep=True if exclude: keep=not keep if keep: match.append(cidx) newexp=hs.reorderbacteria(exp,match) filt='filter taxonomy ' if exact: filt+='exact match ' if exclude: filt+='exclude ' filt+=tax newexp.filters.append(filt) hs.Debug(6,'%d bacteria left' % len(newexp.sids)) hs.addcommand(newexp,"filtertaxonomy",params=params,replaceparams={'exp':exp}) return newexp
def sortbygroupdiff(expdat,field,val1,val2): """ sort bacteria in the experiment by the difference in the mean between the 2 groups (val1,val2 in field) input: expdat field - the name of the field for the 2 groups val1,val2 - the values for the 2 groups output: newexp - the experiment with sorted bacteria """ params=locals() exp1=hs.filtersamples(expdat,field,val1,exact=True) exp2=hs.filtersamples(expdat,field,val2,exact=True) m1=np.mean(np.log2(exp1.data+2),axis=1) m2=np.mean(np.log2(exp2.data+2),axis=1) diff=(m1-m2)/(m1+m2+20) sv,si=hs.isort(diff) newexp=hs.reorderbacteria(expdat,si) newexp.filters.append("sort by group difference field=%s val1=%s val2=%s" % (field,val1,val2)) hs.addcommand(newexp,"sortbygroupdiff",params=params,replaceparams={'expdat':expdat}) return newexp
def subsample(expdat,numreads=10000,inplace=False): """ subsample (rarify) reads from all samples in an experiment input: expdat numreads - number of reads to subsample to inplace - true to replace current experiment output: newexp - the new subsampled experiment """ import biom params=locals() newexp=hs.filterorigreads(expdat,numreads,inplace) newexp=hs.toorigreads(newexp,inplace=True) table=biom.table.Table(newexp.data,newexp.seqs,newexp.samples) table=table.subsample(numreads,axis='observation') tids=table.ids(axis='sample') for idx,cid in enumerate(tids): if not cid==newexp.samples[idx]: print('problem with sample ids!!!!') newpos=[] for cseq in table.ids(axis='observation'): newpos.append(newexp.seqdict[cseq]) newexp=hs.reorderbacteria(newexp,newpos,inplace=True) newexp.data=table.matrix_data.todense().A newexp=normalizereads(newexp,numreads=10000,inplace=True,fixorig=False) for cidx in range(len(newexp.samples)): newexp.origreads[cidx]=numreads newexp=updateorigreads(newexp) newexp.filters.append("subsample to %d" % numreads) hs.addcommand(newexp,"subsample",params=params,replaceparams={'expdat':expdat}) return newexp
def filtern(expdat): """ delete sequences containing "N" from experiment and renormalize input: expdat : Experiment output: newexp : Experiment experiment without sequences containing "N" """ params=locals() keeplist=[] for idx,cseq in enumerate(expdat.seqs): if "N" in cseq: continue if "n" in cseq: continue keeplist.append(idx) newexp=hs.reorderbacteria(expdat,keeplist) newexp=hs.normalizereads(newexp) newexp.filters.append('Filter sequences containing N') hs.addcommand(newexp,"filtern",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d sequences before filtering, %d after' % (len(expdat.seqs),len(newexp.seqs))) return newexp
def filterwave(expdat,field=False,numeric=True,minfold=2,minlen=3,step=1,direction='up',posloc='start'): """ filter bacteria, keeping only ones that show a consecutive region of samples with higher/lower mean than other samples Done by scanning all windowlen/startpos options for each bacteria input: expdat : Experiment field : string The field to sort by or False to skip sorting numeric : bool For the sorting according to field (does not matter if field is False) minfold : float The minimal fold change for the window compared to the rest in order to keep step : int The skip between tested windows (to make it faster use a larger skip) minlen : int The minimal window len for over/under expression testing direction : string 'both' - test both over and under expression in the window 'up' - only overexpressed 'down' - only underexpressed posloc : string The position to measure the beginning ('maxstart') or middle ('maxmid') of maximal wave or 'gstart' to use beginning of first window with >=minfold change output: newexp : Experiment The filtered experiment, sorted according to window start samples position """ params=locals() # sort if needed if field: newexp=hs.sortsamples(expdat,field,numeric=numeric) else: newexp=hs.copyexp(expdat) dat=newexp.data dat[dat<1]=1 dat=np.log2(dat) numsamples=len(newexp.samples) numbact=len(newexp.seqs) maxdiff=np.zeros([numbact]) maxpos=np.zeros([numbact])-1 maxlen=np.zeros([numbact]) for startpos in range(numsamples-minlen): for cwin in np.arange(minlen,numsamples-startpos,step): meanin=np.mean(dat[:,startpos:startpos+cwin],axis=1) nowin=[] if startpos>0: nonwin=np.arange(startpos-1) if startpos<numsamples: nowin=np.hstack([nowin,np.arange(startpos,numsamples-1)]) nowin=nowin.astype(int) meanout=np.mean(dat[:,nowin],axis=1) cdiff=meanin-meanout if direction=='both': cdiff=np.abs(cdiff) elif direction=='down': cdiff=-cdiff if posloc=='gstart': usepos=np.logical_and(cdiff>=minfold,maxpos==-1) maxpos[usepos]=startpos elif posloc=='start': maxpos[cdiff>maxdiff]=startpos elif posloc=='mid': maxpos[cdiff>maxdiff]=startpos+int(cwin/2) else: hs.Debug('posloc nut supported %s' % posloc) return False maxlen[cdiff>maxdiff]=cwin maxdiff=np.maximum(maxdiff,cdiff) keep=np.where(maxdiff>=minfold)[0] keeppos=maxpos[keep] si=np.argsort(keeppos) keep=keep[si] for ci in keep: hs.Debug(6,'bacteria %s startpos %d len %d diff %f' % (newexp.tax[ci],maxpos[ci],maxlen[ci],maxdiff[ci])) newexp=hs.reorderbacteria(newexp,keep) newexp.filters.append('Filter wave field=%s minlen=%d' % (field,minlen)) hs.addcommand(newexp,"filterwave",params=params,replaceparams={'expdat':expdat}) hs.Debug(6,'%d samples before filtering, %d after' % (len(expdat.samples),len(newexp.samples))) return newexp