def _run_core(self,): """ Core routine for detecting outliers Parameters ---------- imgfile : motionfile : """ attr = SampleAttributes(self.inputs.attributes_file) dataset = fmri_dataset( samples=self.inputs.samples_file, labels=attr.labels, chunks=attr.chunks, mask=self.inputs.mask_file) if 'rest' in dataset.uniquelabels: dataset = dataset[dataset.sa.labels != 'rest'] # zscore dataset relative to baseline ('rest') mean zscore(dataset, chunks_attr=True, dtype='float32') # choose classifier clf = LinearCSVMC() # setup measure to be computed by Searchlight # cross-validated mean transfer using an N-fold dataset splitter cv = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) sl = sphere_searchlight(cv, radius=self.inputs.radius, space='voxel_indices', nproc=2, mapper=mean_sample()) ds = dataset.copy(deep=False, sa=['labels', 'chunks'], fa=['voxel_indices'], a=[]) sl_map = sl(ds) # map sensitivity map into original dataspace orig_sl_map = dataset.map2nifti(sl_map) orig_sl_map.save(self._get_output_filename())
def test_mapper_vs_zscore(): """Test by comparing to results of elderly z-score function """ # data: 40 sample feature line in 20d space (40x20; samples x features) dss = [ dataset_wizard(np.concatenate( [np.arange(40) for i in range(20)]).reshape(20,-1).T, targets=1, chunks=1), ] + datasets.values() for ds in dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper(chunks_attr=None) assert_raises(RuntimeError, zsm.forward, ds1.samples) zsm.train(ds1) ds1z = zsm.forward(ds1.samples) zscore(ds2, chunks_attr=None) assert_array_almost_equal(ds1z, ds2.samples) assert_array_equal(ds1.samples, ds.samples)
def test_linear_svm_weights_per_class(self, svm): # assumming many defaults it is as simple as kwargs = dict(enable_ca=["sensitivities"]) sana_split = svm.get_sensitivity_analyzer( split_weights=True, **kwargs) sana_full = svm.get_sensitivity_analyzer( force_train=False, **kwargs) # and lets look at all sensitivities ds2 = datasets['uni4large'].copy() zscore(ds2, param_est=('targets', ['L2', 'L3'])) ds2 = ds2[np.logical_or(ds2.sa.targets == 'L0', ds2.sa.targets == 'L1')] senssplit = sana_split(ds2) sensfull = sana_full(ds2) self.failUnlessEqual(senssplit.shape, (2, ds2.nfeatures)) self.failUnlessEqual(sensfull.shape, (1, ds2.nfeatures)) # just to verify that we split properly and if we reconstruct # manually we obtain the same dmap = (-1 * senssplit.samples[1] + senssplit.samples[0]) \ - sensfull.samples self.failUnless((np.abs(dmap) <= 1e-10).all()) #print "____" #print senssplit #print SMLR().get_sensitivity_analyzer(combiner=None)(ds2) # for now we can do split weights for binary tasks only, so # lets check if we raise a concern # we temporarily shutdown warning, since it is going to complain # otherwise, but we do it on purpose here handlers = warning.handlers warning.handlers = [] self.failUnlessRaises(NotImplementedError, sana_split, datasets['uni3medium']) # reenable the warnings warning.handlers = handlers
def test_linear_svm_weights_per_class(self, svm): # assumming many defaults it is as simple as kwargs = dict(enable_ca=["sensitivities"]) sana_split = svm.get_sensitivity_analyzer(split_weights=True, **kwargs) sana_full = svm.get_sensitivity_analyzer(force_training=False, **kwargs) # and lets look at all sensitivities ds2 = datasets['uni4large'].copy() zscore(ds2, param_est=('targets', ['L2', 'L3'])) ds2 = ds2[np.logical_or(ds2.sa.targets == 'L0', ds2.sa.targets == 'L1')] senssplit = sana_split(ds2) sensfull = sana_full(ds2) self.failUnlessEqual(senssplit.shape, (2, ds2.nfeatures)) self.failUnlessEqual(sensfull.shape, (1, ds2.nfeatures)) # just to verify that we split properly and if we reconstruct # manually we obtain the same dmap = (-1 * senssplit.samples[1] + senssplit.samples[0]) \ - sensfull.samples self.failUnless((np.abs(dmap) <= 1e-10).all()) #print "____" #print senssplit #print SMLR().get_sensitivity_analyzer(combiner=None)(ds2) # for now we can do split weights for binary tasks only, so # lets check if we raise a concern # we temporarily shutdown warning, since it is going to complain # otherwise, but we do it on purpose here handlers = warning.handlers warning.handlers = [] self.failUnlessRaises(NotImplementedError, sana_split, datasets['uni3medium']) # reenable the warnings warning.handlers = handlers
def __call__(self, datasets): """Estimate mappers for each dataset Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained Mappers of the same length as datasets """ params = self.params # for quicker access ;) ca = self.ca ndatasets = len(datasets) nfeatures = [ds.nfeatures for ds in datasets] residuals = None if ca['residual_errors'].enabled: residuals = np.zeros((2 + params.level2_niter, ndatasets)) ca.residual_errors = Dataset( samples = residuals, sa = {'levels' : ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)] + ['3']}) if __debug__: debug('HPAL', "Hyperalignment %s for %i datasets" % (self, ndatasets)) if params.ref_ds is None: ref_ds = np.argmax(nfeatures) else: ref_ds = params.ref_ds if ref_ds < 0 and ref_ds >= ndatasets: raise ValueError, "Requested reference dataset %i is out of " \ "bounds. We have only %i datasets provided" \ % (ref_ds, ndatasets) ca.choosen_ref_ds = ref_ds # might prefer some other way to initialize... later mappers = [deepcopy(params.alignment) for ds in datasets] # zscore all data sets # ds = [ zscore(ds, chunks_attr=None) for ds in datasets] # Level 1 (first) commonspace = np.asanyarray(datasets[ref_ds]) if params.zscore_common: zscore(commonspace, chunks_attr=None) data_mapped = [np.asanyarray(ds) for ds in datasets] for i, (m, data) in enumerate(zip(mappers, data_mapped)): if __debug__: debug('HPAL_', "Level 1: ds #%i" % i) if i == ref_ds: continue #ZSC zscore(data, chunks_attr=None) ds = dataset_wizard(samples=data, targets=commonspace) #ZSC zscore(ds, chunks_attr=None) m.train(ds) data_temp = m.forward(data) #ZSC zscore(data_temp, chunks_attr=None) data_mapped[i] = data_temp if residuals is not None: residuals[0, i] = np.linalg.norm(data_temp - commonspace) ## if ds_mapped == []: ## ds_mapped = [zscore(m.forward(d), chunks_attr=None)] ## else: ## ds_mapped += [zscore(m.forward(d), chunks_attr=None)] # zscore before adding # TODO: make just a function so we dont' waste space commonspace = params.combiner1(data_mapped[i], commonspace) if params.zscore_common: zscore(commonspace, chunks_attr=None) # update commonspace to mean of ds_mapped commonspace = params.combiner2(data_mapped) if params.zscore_common: zscore(commonspace, chunks_attr=None) # Level 2 -- might iterate multiple times for loop in xrange(params.level2_niter): for i, (m, ds) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 2 (%i-th iteration): ds #%i" % (loop, i)) ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i]) ## /(ndatasets-1), chunks_attr=None ) ds_new = ds.copy() #ZSC zscore(ds_new, chunks_attr=None) #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1) #ZSC zscore(ds_temp, chunks_attr=None) ds_new.targets = commonspace #PRJ ds_temp m.train(ds_new) # ds_temp) data_mapped[i] = m.forward(np.asanyarray(ds)) if residuals is not None: residuals[1+loop, i] = np.linalg.norm(data_mapped - commonspace) #ds_mapped[i] = zscore( m.forward(ds_temp), chunks_attr=None) commonspace = params.combiner2(data_mapped) if params.zscore_common: zscore(commonspace, chunks_attr=None) # Level 3 (last) to params.levels for i, (m, ds) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 3: ds #%i" % i) ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i]) ## /(ndatasets-1), chunks_attr=None ) ds_new = ds.copy() # shallow copy so we could assign new labels #ZSC zscore(ds_new, chunks_attr=None) #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1) #ZSC zscore(ds_temp, chunks_attr=None) ds_new.targets = commonspace #PRJ ds_temp# m.train(ds_new) #ds_temp) if residuals is not None: data_mapped = m.forward(ds_new) residuals[-1, i] = np.linalg.norm(data_mapped - commonspace) return mappers
def test_zscore(): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\ reshape((16, 1)) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) assert_equal(data.samples.mean(), 2.0) assert_equal(data.samples.std(), 1.0) zscore(data, chunks_attr='chunks') # check z-scoring check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype='float64').reshape(16, 1) assert_array_equal(data.samples, check) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) zscore(data, chunks_attr=None) assert_array_equal(data.samples, check) # check z-scoring taking set of labels as a baseline data = dataset_wizard(samples.copy(), targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples + 1.0) # check that zscore modifies in-place; only guaranteed if no upcasting is # necessary samples = samples.astype('float') data = dataset_wizard(samples, targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples) # these might be duplicating code above -- but twice is better than nothing # dataset: mean=2, std=1 raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) # dataset: mean=12, std=1 raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10 # zscore target check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0] ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) zm = ZScoreMapper() # should do global zscore by default zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check])) # should not modify the source assert_array_equal(pristine, ds) # if we tell it a different mean it should obey the order zm = ZScoreMapper(params=(3,1)) zm.train(ds) assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 ) assert_array_equal(pristine, ds) # let's look at chunk-wise z-scoring ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())), targets=range(32), chunks=[0] * 16 + [1] * 16) # by default chunk-wise zm = ZScoreMapper() zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # we should be able to do that same manually zm = ZScoreMapper(params={0: (2,1), 1: (12,1)}) zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
def test_zscore(): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\ reshape((16, 1)) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) assert_equal(data.samples.mean(), 2.0) assert_equal(data.samples.std(), 1.0) data_samples = data.samples.copy() zscore(data, chunks_attr='chunks') # copy should stay intact assert_equal(data_samples.mean(), 2.0) assert_equal(data_samples.std(), 1.0) # we should be able to operate on ndarrays # But we can't change type inplace for an array, can't we? assert_raises(TypeError, zscore, data_samples, chunks_attr=None) # so lets do manually data_samples = data_samples.astype(float) zscore(data_samples, chunks_attr=None) assert_array_equal(data.samples, data_samples) print data_samples # check z-scoring check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype='float64').reshape(16, 1) assert_array_equal(data.samples, check) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) zscore(data, chunks_attr=None) assert_array_equal(data.samples, check) # check z-scoring taking set of labels as a baseline data = dataset_wizard(samples.copy(), targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples + 1.0) # check that zscore modifies in-place; only guaranteed if no upcasting is # necessary samples = samples.astype('float') data = dataset_wizard(samples, targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples) # these might be duplicating code above -- but twice is better than nothing # dataset: mean=2, std=1 raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) # dataset: mean=12, std=1 raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10 # zscore target check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0] ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) zm = ZScoreMapper() # should do global zscore by default zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check])) # should not modify the source assert_array_equal(pristine, ds) # if we tell it a different mean it should obey the order zm = ZScoreMapper(params=(3,1)) zm.train(ds) assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 ) assert_array_equal(pristine, ds) # let's look at chunk-wise z-scoring ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())), targets=range(32), chunks=[0] * 16 + [1] * 16) # by default chunk-wise zm = ZScoreMapper() zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # we should be able to do that same manually zm = ZScoreMapper(params={0: (2,1), 1: (12,1)}) zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))