def test_identity_mapper(s): idm = IdentityMapper() # doesn't matter what you throw at it assert_true(idm.forward(s) is s) assert_true(idm.forward1(s) is s) assert_true(idm.reverse(s) is s) assert_true(idm.reverse1(s) is s) # even like this it should work, but type conversion # can happen assert_array_equal(_verified_reverse1(idm, s), s) assert_array_equal(idm.reverse1(s), s)
def get_cluster_sizes(ds, cluster_counter=None): """Computer cluster sizes from all samples in a boolean dataset. Individually for each sample, in the input dataset, clusters of non-zero values will be determined after reverse-applying any transformation of the dataset's mapper (if any). Parameters ---------- ds : dataset or array A dataset with boolean samples. cluster_counter : list or None If not None, the given list is extended with the cluster sizes computed from the present input dataset. Otherwise, a new list is generated. Returns ------- list Unsorted list of cluster sizes from all samples in the input dataset (optionally appended to any values passed via ``cluster_counter``). """ # XXX input needs to be boolean for the cluster size calculation to work if cluster_counter is None: cluster_counter = Counter() mapper = IdentityMapper() data = np.asanyarray(ds) if hasattr(ds, 'a') and 'mapper' in ds.a: mapper = ds.a.mapper for i in xrange(len(ds)): osamp = mapper.reverse1(data[i]) m_clusters = _get_map_cluster_sizes(osamp) cluster_counter.update(m_clusters) return cluster_counter
def test_identity_mapper(s): idm = IdentityMapper() # doesn't matter what you throw at it assert_true(idm.forward(s) is s) assert_true(idm.forward1(s) is s) assert_true(idm.reverse(s) is s) assert_true(idm.reverse1(s) is s)
def test_identity_mapper(s): idm = IdentityMapper() # doesn't matter what you throw at it assert_true(idm.forward(s) is s) assert_true(idm.forward1(s) is s) assert_true(idm.reverse(s) is s) assert_true(idm.reverse1(s) is s)
def _call(self, ds): if len(ds) > 1: # average all samples into one, assuming we got something like one # sample per subject as input avgr = mean_sample() ds = avgr(ds) # threshold input; at this point we only have one sample left thrd = ds.samples[0] > self._thrmap # mapper default mapper = IdentityMapper() # overwrite if possible if hasattr(ds, 'a') and 'mapper' in ds.a: mapper = ds.a.mapper # reverse-map input othrd = mapper.reverse1(thrd) osamp = mapper.reverse1(ds.samples[0]) # prep output dataset outds = ds.copy(deep=False) outds.fa['featurewise_thresh'] = self._thrmap # determine clusters labels, num = measurements.label(othrd) area = measurements.sum(othrd, labels, index=np.arange(1, num + 1)).astype(int) com = measurements.center_of_mass( osamp, labels=labels, index=np.arange(1, num + 1)) maxpos = measurements.maximum_position( osamp, labels=labels, index=np.arange(1, num + 1)) # for the rest we need the labels flattened labels = mapper.forward1(labels) # relabel clusters starting with the biggest and increase index with # decreasing size ordered_labels = np.zeros(labels.shape, dtype=int) ordered_area = np.zeros(area.shape, dtype=int) ordered_com = np.zeros((num, len(osamp.shape)), dtype=float) ordered_maxpos = np.zeros((num, len(osamp.shape)), dtype=float) for i, idx in enumerate(np.argsort(area)): ordered_labels[labels == idx + 1] = num - i # kinda ugly, but we are looping anyway ordered_area[i] = area[idx] ordered_com[i] = com[idx] ordered_maxpos[i] = maxpos[idx] labels = ordered_labels area = ordered_area[::-1] com = ordered_com[::-1] maxpos = ordered_maxpos[::-1] del ordered_labels # this one can be big # store cluster labels after forward-mapping outds.fa['clusters_featurewise_thresh'] = labels.copy() # location info outds.a['clusterlocations'] = \ np.rec.fromarrays( [com, maxpos], names=('center_of_mass', 'max')) # update cluster size histogram with the actual result to get a # proper lower bound for p-values # this will make a copy, because the original matrix is int cluster_probs_raw = _transform_to_pvals( area, self._null_cluster_sizes.astype('float')) clusterstats = ( [area, cluster_probs_raw], ['size', 'prob_raw'] ) # evaluate a bunch of stats for all clusters morestats = {} for cid in xrange(len(area)): # keep clusters on outer loop, because selection is more expensive clvals = ds.samples[0, labels == cid + 1] for id_, fx in ( ('mean', np.mean), ('median', np.median), ('min', np.min), ('max', np.max), ('std', np.std)): stats = morestats.get(id_, []) stats.append(fx(clvals)) morestats[id_] = stats for k, v in morestats.items(): clusterstats[0].append(v) clusterstats[1].append(k) if not self.params.multicomp_correction is None: # do a local import as only this tiny portion needs statsmodels import statsmodels.stats.multitest as smm rej, probs_corr = smm.multipletests( cluster_probs_raw, alpha=self.params.fwe_rate, method=self.params.multicomp_correction)[:2] # store corrected per-cluster probabilities clusterstats[0].append(probs_corr) clusterstats[1].append('prob_corrected') # remove cluster labels that did not pass the FWE threshold for i, r in enumerate(rej): if not r: labels[labels == i + 1] = 0 outds.fa['clusters_fwe_thresh'] = labels outds.a['clusterstats'] = \ np.rec.fromarrays(clusterstats[0], names=clusterstats[1]) return outds
def _call(self, ds): if len(ds) > 1: # average all samples into one, assuming we got something like one # sample per subject as input avgr = mean_sample() ds = avgr(ds) # threshold input; at this point we only have one sample left thrd = ds.samples[0] > self._thrmap # mapper default mapper = IdentityMapper() # overwrite if possible if hasattr(ds, 'a') and 'mapper' in ds.a: mapper = ds.a.mapper # reverse-map input osamp = mapper.reverse1(thrd) # prep output dataset outds = ds.copy(deep=False) outds.fa['featurewise_thresh'] = self._thrmap # determine clusters labels, num = measurements.label(osamp) area = measurements.sum(osamp, labels, index=np.arange(1, num + 1)).astype(int) # for the rest we need the labels flattened labels = mapper.forward1(labels) # relabel clusters starting with the biggest and increase index with # decreasing size ordered_labels = np.zeros(labels.shape, dtype=int) ordered_area = np.zeros(area.shape, dtype=int) for i, idx in enumerate(np.argsort(area)): ordered_labels[labels == idx + 1] = num - i ordered_area[i] = area[idx] area = ordered_area[::-1] labels = ordered_labels del ordered_labels # this one can be big # store cluster labels after forward-mapping outds.fa['clusters_featurewise_thresh'] = labels.copy() # update cluster size histogram with the actual result to get a # proper lower bound for p-values # this will make a copy, because the original matrix is int cluster_probs_raw = _transform_to_pvals( area, self._null_cluster_sizes.astype('float')) if self.params.multicomp_correction is None: probs_corr = np.array(cluster_probs_raw) rej = probs_corr <= self.params.fwe_rate outds.a['clusterstats'] = \ np.rec.fromarrays( [area, cluster_probs_raw], names=('size', 'prob_raw')) else: # do a local import as only this tiny portion needs statsmodels import statsmodels.stats.multitest as smm rej, probs_corr = smm.multipletests( cluster_probs_raw, alpha=self.params.fwe_rate, method=self.params.multicomp_correction)[:2] # store corrected per-cluster probabilities outds.a['clusterstats'] = \ np.rec.fromarrays( [area, cluster_probs_raw, probs_corr], names=('size', 'prob_raw', 'prob_corrected')) # remove cluster labels that did not pass the FWE threshold for i, r in enumerate(rej): if not r: labels[labels == i + 1] = 0 outds.fa['clusters_fwe_thresh'] = labels return outds