def bench_pymvpa(): # # .. PyMVPA .. # from mvpa.mappers.mdp_adaptor import PCAMapper as MVPA_PCA from mvpa.datasets import dataset_wizard start = datetime.now() clf = MVPA_PCA(output_dim=n_components) data = dataset_wizard(samples=X) clf.train(data) return datetime.now() - start
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.datasets import dataset_wizard from mvpa.clfs import knn as mvpa_knn start = datetime.now() data = dataset_wizard(X, y) mvpa_clf = mvpa_knn.kNN(k=n_neighbors) mvpa_clf.train(data) score = np.mean(mvpa_clf.predict(T) == valid) return score, datetime.now() - start
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.clfs import svm as mvpa_svm from mvpa.datasets import dataset_wizard tstart = datetime.now() data = dataset_wizard(X, y) clf = mvpa_svm.RbfCSVMC(C=1.) clf.train(data) score = np.mean(clf.predict(T) == valid) return score, datetime.now() - tstart
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.clfs import svm from mvpa.datasets import dataset_wizard tstart = datetime.now() data = dataset_wizard(X, y) kernel = svm.RbfSVMKernel(gamma = 1./sigma) clf = svm.SVM(C=1., kernel=kernel) clf.train(data) score = np.mean(clf.predict(T) == valid) return score, datetime.now() - tstart
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.mappers.mdp_adaptor import PCAMapper from mvpa.datasets import dataset_wizard start = datetime.now() clf = PCAMapper(output_dim=n_components) data = dataset_wizard(samples=X) clf.train(data) delta = datetime.now() - start ev = explained_variance(X, clf.proj.T).sum() return ev, delta
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.datasets import dataset_wizard from mvpa.clfs import glmnet as mvpa_glmnet tstart = datetime.now() data = dataset_wizard(X, y) clf = mvpa_glmnet.GLMNET_R(alpha=.5) clf.train(data) mse = np.linalg.norm( clf.predict(T) - valid, 2) ** 2 return mse, datetime.now() - tstart
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.clfs import svm from mvpa.datasets import dataset_wizard tstart = datetime.now() data = dataset_wizard(X, y) kernel = svm.RbfSVMKernel(gamma=1. / sigma) clf = svm.SVM(C=1., kernel=kernel) clf.train(data) score = np.mean(clf.predict(T) == valid) return score, datetime.now() - tstart
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.datasets import dataset_wizard from mvpa.clfs import lars as mvpa_lars tstart = datetime.now() data = dataset_wizard(X, y) mvpa_clf = mvpa_lars.LARS() mvpa_clf.train(data) # BROKEN # mvpa_pred = mvpa_clf.predict(X) return None, datetime.now() - tstart
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.datasets import dataset_wizard from mvpa.clfs import lars start = datetime.now() data = dataset_wizard(X, y) clf = lars.LARS(model_type="lasso") clf.train(data) pred = clf.predict(T) delta = datetime.now() - start mse = np.linalg.norm(pred - valid, 2)**2 return mse, delta
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.datasets import dataset_wizard from mvpa.clfs import glmnet start = datetime.now() data = dataset_wizard(X, y) clf = glmnet.GLMNET_R(alpha=.5) clf.train(data) pred = clf.predict(T) delta = datetime.now() - start mse = np.linalg.norm(pred - valid, 2)**2 return mse, delta
def _call(self, ds_): """Extract weights from GPR .. note: Input dataset is not actually used. New dataset is constructed from what is known to the classifier """ clf = self.clf # normalize data: clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \ / clf._train_labels.std() # clf._train_fv = (clf._train_fv-clf._train_fv.mean(0)) \ # /clf._train_fv.std(0) ds = dataset_wizard(samples=clf._train_fv, targets=clf._train_labels) clf.ca.enable("log_marginal_likelihood") ms = ModelSelector(clf, ds) # Note that some kernels does not have gradient yet! # XXX Make it initialize to clf's current hyperparameter values # or may be add ability to specify starting points in the constructor sigma_noise_initial = 1.0e-5 sigma_f_initial = 1.0 length_scale_initial = np.ones(ds.nfeatures) * 1.0e4 # length_scale_initial = np.random.rand(ds.nfeatures)*1.0e4 hyp_initial_guess = np.hstack( [sigma_noise_initial, sigma_f_initial, length_scale_initial]) fixedHypers = array([0] * hyp_initial_guess.size, dtype=bool) fixedHypers = None problem = ms.max_log_marginal_likelihood( hyp_initial_guess=hyp_initial_guess, optimization_algorithm="scipy_lbfgsb", ftol=1.0e-3, fixedHypers=fixedHypers, use_gradient=True, logscale=True) if __debug__ and 'GPR_WEIGHTS' in debug.active: problem.iprint = 1 lml = ms.solve() weights = 1.0 / ms.hyperparameters_best[ 2:] # weight = 1/length_scale if __debug__: debug( "GPR", "%s, train: shape %s, labels %s, min:max %g:%g, " "sigma_noise %g, sigma_f %g" % (clf, clf._train_fv.shape, np.unique(clf._train_labels), clf._train_fv.min(), clf._train_fv.max(), ms.hyperparameters_best[0], ms.hyperparameters_best[1])) return weights
def bench_pymvpa(X, y, T, valid): # # .. PyMVPA .. # from mvpa.datasets import dataset_wizard from mvpa.clfs import lars start = datetime.now() data = dataset_wizard(X, y) clf = lars.LARS(model_type="lasso") clf.train(data) pred = clf.predict(T) delta = datetime.now() - start mse = np.linalg.norm(pred - valid, 2) ** 2 return mse, delta
def _call(self, ds_): """Extract weights from GPR .. note: Input dataset is not actually used. New dataset is constructed from what is known to the classifier """ clf = self.clf # normalize data: clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \ / clf._train_labels.std() # clf._train_fv = (clf._train_fv-clf._train_fv.mean(0)) \ # /clf._train_fv.std(0) ds = dataset_wizard(samples=clf._train_fv, targets=clf._train_labels) clf.ca.enable("log_marginal_likelihood") ms = ModelSelector(clf, ds) # Note that some kernels does not have gradient yet! # XXX Make it initialize to clf's current hyperparameter values # or may be add ability to specify starting points in the constructor sigma_noise_initial = 1.0e-5 sigma_f_initial = 1.0 length_scale_initial = np.ones(ds.nfeatures)*1.0e4 # length_scale_initial = np.random.rand(ds.nfeatures)*1.0e4 hyp_initial_guess = np.hstack([sigma_noise_initial, sigma_f_initial, length_scale_initial]) fixedHypers = array([0]*hyp_initial_guess.size, dtype=bool) fixedHypers = None problem = ms.max_log_marginal_likelihood( hyp_initial_guess=hyp_initial_guess, optimization_algorithm="scipy_lbfgsb", ftol=1.0e-3, fixedHypers=fixedHypers, use_gradient=True, logscale=True) if __debug__ and 'GPR_WEIGHTS' in debug.active: problem.iprint = 1 lml = ms.solve() weights = 1.0/ms.hyperparameters_best[2:] # weight = 1/length_scale if __debug__: debug("GPR", "%s, train: shape %s, labels %s, min:max %g:%g, " "sigma_noise %g, sigma_f %g" % (clf, clf._train_fv.shape, np.unique(clf._train_labels), clf._train_fv.min(), clf._train_fv.max(), ms.hyperparameters_best[0], ms.hyperparameters_best[1])) return weights
def test_polydetrend(): samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6], [-2.0, -4, -6, -8, -10, -12]], ndmin=2 ).T samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1], [-2.0, -4, -6, -6, -4, -2]], ndmin=2 ).T chunks = [0, 0, 0, 1, 1, 1] chunks_bad = [ 0, 0, 1, 1, 1, 0] target_whole = np.array( [[-3.0, -2, -1, 1, 2, 3], [-6, -4, -2, 2, 4, 6]], ndmin=2 ).T target_chunked = np.array( [[-1.0, 0, 1, 1, 0, -1], [2, 0, -2, -2, 0, 2]], ndmin=2 ).T ds = Dataset(samples_forwhole) # this one will auto-train the mapper on first use dm = PolyDetrendMapper(polyord=1, inspace='police') mds = dm(ds) # features are linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials assert_array_equal(mds.sa.police, np.arange(len(ds))) # hackish way to get the previous regressors into a dataset ds.sa['opt_reg_const'] = dm._regs[:,0] ds.sa['opt_reg_lin'] = dm._regs[:,1] # using these precomputed regressors, we should get the same result as # before even if we do not generate a regressor for linear dm_optreg = PolyDetrendMapper(polyord=0, opt_regs=['opt_reg_const', 'opt_reg_lin']) mds_optreg = dm_optreg(ds) assert_array_almost_equal(mds_optreg, np.zeros(mds.shape)) ds = Dataset(samples_forchunks) # 'constant' detrending removes the mean mds = PolyDetrendMapper(polyord=0)(ds) assert_array_almost_equal( mds.samples, samples_forchunks - np.mean(samples_forchunks, axis=0)) # if there is no GLOBAL linear trend it should be identical to mean removal # even if trying to remove linear mds2 = PolyDetrendMapper(polyord=1)(ds) assert_array_almost_equal(mds, mds2) # chunk-wise detrending ds = dataset_wizard(samples_forchunks, chunks=chunks) dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='police') mds = dm(ds) # features are chunkswise linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials, which is the identical linspace in both # chunks assert_array_equal(mds.sa.police, range(3) * 2) # non-matching number of samples cannot be mapped assert_raises(ValueError, dm, ds[:-1]) # however, if the dataset knows about the space it is possible ds.sa['police'] = mds.sa.police # XXX this should be #mds2 = dm(ds[1:-1]) #assert_array_equal(mds[1:-1], mds2) # XXX but right now is assert_raises(NotImplementedError, dm, ds[1:-1]) # Detrend must preserve the size of dataset assert_equal(mds.shape, ds.shape) # small additional test for break points # although they are no longer there ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T, targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1)(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # test of different polyord on each chunk target_mixed = np.array( [[-1.0, 0, 1, 0, 0, 0], [2.0, 0, -2, 0, 0, 0]], ndmin=2 ).T ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0,1])(ds) assert_array_almost_equal(mds, target_mixed) # test irregluar spacing of samples, but with corrective time info samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9], [-2.0, -8, -12, -16, -4, -18]], ndmin=2 ).T ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:,0]}) # linear detrending that makes use of temporal info from dataset dm = PolyDetrendMapper(polyord=1, inspace='time') mds = dm(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # and now the same stuff, but with chunking and ordered by time samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1], [-2.0, -6, -6, -4, -4, -2]], ndmin=2 ).T chunks = [0, 1, 0, 1, 0, 1] time = [4, 4, 12, 8, 8, 12] ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time}) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='time')(ds) # the whole thing must not affect the source data assert_array_equal(ds, samples_forchunks) # but if done inplace that is no longer true poly_detrend(ds, chunks_attr='chunks', polyord=1, inspace='time') assert_array_equal(ds, mds)
def test_polydetrend(): samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6], [-2.0, -4, -6, -8, -10, -12]], ndmin=2).T samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1], [-2.0, -4, -6, -6, -4, -2]], ndmin=2).T chunks = [0, 0, 0, 1, 1, 1] chunks_bad = [0, 0, 1, 1, 1, 0] target_whole = np.array([[-3.0, -2, -1, 1, 2, 3], [-6, -4, -2, 2, 4, 6]], ndmin=2).T target_chunked = np.array([[-1.0, 0, 1, 1, 0, -1], [2, 0, -2, -2, 0, 2]], ndmin=2).T ds = Dataset(samples_forwhole) # this one will auto-train the mapper on first use dm = PolyDetrendMapper(polyord=1, inspace='police') mds = dm(ds) # features are linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials assert_array_equal(mds.sa.police, np.arange(len(ds))) # hackish way to get the previous regressors into a dataset ds.sa['opt_reg_const'] = dm._regs[:, 0] ds.sa['opt_reg_lin'] = dm._regs[:, 1] # using these precomputed regressors, we should get the same result as # before even if we do not generate a regressor for linear dm_optreg = PolyDetrendMapper(polyord=0, opt_regs=['opt_reg_const', 'opt_reg_lin']) mds_optreg = dm_optreg(ds) assert_array_almost_equal(mds_optreg, np.zeros(mds.shape)) ds = Dataset(samples_forchunks) # 'constant' detrending removes the mean mds = PolyDetrendMapper(polyord=0)(ds) assert_array_almost_equal( mds.samples, samples_forchunks - np.mean(samples_forchunks, axis=0)) # if there is no GLOBAL linear trend it should be identical to mean removal # even if trying to remove linear mds2 = PolyDetrendMapper(polyord=1)(ds) assert_array_almost_equal(mds, mds2) # chunk-wise detrending ds = dataset_wizard(samples_forchunks, chunks=chunks) dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='police') mds = dm(ds) # features are chunkswise linear trends, so detrending should remove all assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # we get the information where each sample is assumed to be in the # space spanned by the polynomials, which is the identical linspace in both # chunks assert_array_equal(mds.sa.police, range(3) * 2) # non-matching number of samples cannot be mapped assert_raises(ValueError, dm, ds[:-1]) # however, if the dataset knows about the space it is possible ds.sa['police'] = mds.sa.police # XXX this should be #mds2 = dm(ds[1:-1]) #assert_array_equal(mds[1:-1], mds2) # XXX but right now is assert_raises(NotImplementedError, dm, ds[1:-1]) # Detrend must preserve the size of dataset assert_equal(mds.shape, ds.shape) # small additional test for break points # although they are no longer there ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T, targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1)(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # test of different polyord on each chunk target_mixed = np.array([[-1.0, 0, 1, 0, 0, 0], [2.0, 0, -2, 0, 0, 0]], ndmin=2).T ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0, 1])(ds) assert_array_almost_equal(mds, target_mixed) # test irregluar spacing of samples, but with corrective time info samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9], [-2.0, -8, -12, -16, -4, -18]], ndmin=2).T ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:, 0]}) # linear detrending that makes use of temporal info from dataset dm = PolyDetrendMapper(polyord=1, inspace='time') mds = dm(ds) assert_array_almost_equal(mds.samples, np.zeros(mds.shape)) # and now the same stuff, but with chunking and ordered by time samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1], [-2.0, -6, -6, -4, -4, -2]], ndmin=2).T chunks = [0, 1, 0, 1, 0, 1] time = [4, 4, 12, 8, 8, 12] ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time}) mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='time')(ds) # the whole thing must not affect the source data assert_array_equal(ds, samples_forchunks) # but if done inplace that is no longer true poly_detrend(ds, chunks_attr='chunks', polyord=1, inspace='time') assert_array_equal(ds, mds)
def give_data(): # 100x10, 10 chunks, 4 targets return dataset_wizard(np.random.normal(size=(100,10)), targets=[ i%4 for i in range(100) ], chunks=[ i/10 for i in range(100)])
def test_erdataset(): # 3 chunks, 5 targets, blocks of 5 samples each nchunks = 3 ntargets = 5 blocklength = 5 nfeatures = 10 targets = np.tile(np.repeat(range(ntargets), blocklength), nchunks) chunks = np.repeat(np.arange(nchunks), ntargets * blocklength) samples = np.repeat( np.arange(nchunks * ntargets * blocklength), nfeatures).reshape(-1, nfeatures) ds = dataset_wizard(samples, targets=targets, chunks=chunks) # check if events are determined properly evs = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) for ev in evs: assert_equal(ev['duration'], blocklength) assert_equal(ntargets * nchunks, len(evs)) for t in range(ntargets): assert_equal(len([ev for ev in evs if ev['targets'] == t]), nchunks) # now turn `ds` into an eventreleated dataset erds = eventrelated_dataset(ds, evs) # the only unprefixed sample attributes are assert_equal(sorted([a for a in ds.sa if not a.startswith('event')]), ['chunks', 'targets']) # samples as expected? assert_array_equal(erds.samples[0], np.repeat(np.arange(blocklength), nfeatures)) # that should also be the temporal feature offset assert_array_equal(erds.samples[0], erds.fa.event_offsetidx) assert_array_equal(erds.sa.event_onsetidx, np.arange(0,71,5)) # finally we should see two mappers assert_equal(len(erds.a.mapper), 2) assert_true(isinstance(erds.a.mapper[0], BoxcarMapper)) assert_true(isinstance(erds.a.mapper[1], FlattenMapper)) # # now check the same dataset with event descretization tr = 2.5 ds.sa['time'] = np.arange(nchunks * ntargets * blocklength) * tr evs = [{'onset': 4.9, 'duration': 6.2}] # doesn't work without conversion assert_raises(ValueError, eventrelated_dataset, ds, evs) erds = eventrelated_dataset(ds, evs, time_attr='time') assert_equal(len(erds), 1) assert_array_equal(erds.samples[0], np.repeat(np.arange(1,5), nfeatures)) assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']]) assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']]) assert_array_almost_equal(erds.sa.orig_offset, [2.4]) assert_array_equal(erds.sa.time, [np.arange(2.5, 11, 2.5)]) # now with closest match erds = eventrelated_dataset(ds, evs, time_attr='time', match='closest') expected_nsamples = 3 assert_equal(len(erds), 1) assert_array_equal(erds.samples[0], np.repeat(np.arange(2,2+expected_nsamples), nfeatures)) assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']]) assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']]) assert_array_almost_equal(erds.sa.orig_offset, [-0.1]) assert_array_equal(erds.sa.time, [np.arange(5.0, 11, 2.5)]) # now test the way back results = np.arange(erds.nfeatures) assert_array_equal(erds.a.mapper.reverse1(results), results.reshape(expected_nsamples, nfeatures)) # what about multiple results? nresults = 5 results = dataset_wizard([results] * nresults) # and let's have an attribute to make it more difficult results.sa['myattr'] = np.arange(5) rds = erds.a.mapper.reverse(results) assert_array_equal(rds, results.samples.reshape(nresults * expected_nsamples, nfeatures)) assert_array_equal(rds.sa.myattr, np.repeat(results.sa.myattr, expected_nsamples))
def __call__(self, datasets): """Estimate mappers for each dataset Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained Mappers of the same length as datasets """ params = self.params # for quicker access ;) ca = self.ca ndatasets = len(datasets) nfeatures = [ds.nfeatures for ds in datasets] residuals = None if ca['residual_errors'].enabled: residuals = np.zeros((2 + params.level2_niter, ndatasets)) ca.residual_errors = Dataset( samples = residuals, sa = {'levels' : ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)] + ['3']}) if __debug__: debug('HPAL', "Hyperalignment %s for %i datasets" % (self, ndatasets)) if params.ref_ds is None: ref_ds = np.argmax(nfeatures) else: ref_ds = params.ref_ds if ref_ds < 0 and ref_ds >= ndatasets: raise ValueError, "Requested reference dataset %i is out of " \ "bounds. We have only %i datasets provided" \ % (ref_ds, ndatasets) ca.choosen_ref_ds = ref_ds # might prefer some other way to initialize... later mappers = [deepcopy(params.alignment) for ds in datasets] # zscore all data sets # ds = [ zscore(ds, chunks_attr=None) for ds in datasets] # Level 1 (first) commonspace = np.asanyarray(datasets[ref_ds]) if params.zscore_common: zscore(commonspace, chunks_attr=None) data_mapped = [np.asanyarray(ds) for ds in datasets] for i, (m, data) in enumerate(zip(mappers, data_mapped)): if __debug__: debug('HPAL_', "Level 1: ds #%i" % i) if i == ref_ds: continue #ZSC zscore(data, chunks_attr=None) ds = dataset_wizard(samples=data, targets=commonspace) #ZSC zscore(ds, chunks_attr=None) m.train(ds) data_temp = m.forward(data) #ZSC zscore(data_temp, chunks_attr=None) data_mapped[i] = data_temp if residuals is not None: residuals[0, i] = np.linalg.norm(data_temp - commonspace) ## if ds_mapped == []: ## ds_mapped = [zscore(m.forward(d), chunks_attr=None)] ## else: ## ds_mapped += [zscore(m.forward(d), chunks_attr=None)] # zscore before adding # TODO: make just a function so we dont' waste space commonspace = params.combiner1(data_mapped[i], commonspace) if params.zscore_common: zscore(commonspace, chunks_attr=None) # update commonspace to mean of ds_mapped commonspace = params.combiner2(data_mapped) if params.zscore_common: zscore(commonspace, chunks_attr=None) # Level 2 -- might iterate multiple times for loop in xrange(params.level2_niter): for i, (m, ds) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 2 (%i-th iteration): ds #%i" % (loop, i)) ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i]) ## /(ndatasets-1), chunks_attr=None ) ds_new = ds.copy() #ZSC zscore(ds_new, chunks_attr=None) #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1) #ZSC zscore(ds_temp, chunks_attr=None) ds_new.targets = commonspace #PRJ ds_temp m.train(ds_new) # ds_temp) data_mapped[i] = m.forward(np.asanyarray(ds)) if residuals is not None: residuals[1+loop, i] = np.linalg.norm(data_mapped - commonspace) #ds_mapped[i] = zscore( m.forward(ds_temp), chunks_attr=None) commonspace = params.combiner2(data_mapped) if params.zscore_common: zscore(commonspace, chunks_attr=None) # Level 3 (last) to params.levels for i, (m, ds) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 3: ds #%i" % i) ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i]) ## /(ndatasets-1), chunks_attr=None ) ds_new = ds.copy() # shallow copy so we could assign new labels #ZSC zscore(ds_new, chunks_attr=None) #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1) #ZSC zscore(ds_temp, chunks_attr=None) ds_new.targets = commonspace #PRJ ds_temp# m.train(ds_new) #ds_temp) if residuals is not None: data_mapped = m.forward(ds_new) residuals[-1, i] = np.linalg.norm(data_mapped - commonspace) return mappers