class LARS(Classifier): """Least angle regression (LARS). LARS is the model selection algorithm from: Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani, Least Angle Regression Annals of Statistics (with discussion) (2004) 32(2), 407-499. A new method for variable subset selection, with the lasso and 'epsilon' forward stagewise methods as special cases. Similar to SMLR, it performs a feature selection while performing classification, but instead of starting with all features, it starts with none and adds them in, which is similar to boosting. This learner behaves more like a ridge regression in that it returns prediction values and it treats the training labels as continuous. In the true nature of the PyMVPA framework, this algorithm is actually implemented in R by Trevor Hastie and wrapped via RPy. To make use of LARS, you must have R and RPy installed as well as the LARS contributed package. You can install the R and RPy with the following command on Debian-based machines: sudo aptitude install python-rpy python-rpy-doc r-base-dev You can then install the LARS package by running R as root and calling: install.packages() """ # XXX from yoh: it is linear, isn't it? __tags__ = [ 'lars', 'regression', 'linear', 'has_sensitivity', 'does_feature_selection', 'rpy2' ] def __init__(self, model_type="lasso", trace=False, normalize=True, intercept=True, max_steps=None, use_Gram=False, **kwargs): """ Initialize LARS. See the help in R for further details on the following parameters: Parameters ---------- model_type : string Type of LARS to run. Can be one of ('lasso', 'lar', 'forward.stagewise', 'stepwise'). trace : boolean Whether to print progress in R as it works. normalize : boolean Whether to normalize the L2 Norm. intercept : boolean Whether to add a non-penalized intercept to the model. max_steps : None or int If not None, specify the total number of iterations to run. Each iteration adds a feature, but leaving it none will add until convergence. use_Gram : boolean Whether to compute the Gram matrix (this should be false if you have more features than samples.) """ # init base class first Classifier.__init__(self, **kwargs) if not model_type in known_models: raise ValueError('Unknown model %s for LARS is specified. Known' % model_type + 'are %s' % repr(known_models)) # set up the params self.__type = model_type self.__normalize = normalize self.__intercept = intercept self.__trace = trace self.__max_steps = max_steps self.__use_Gram = use_Gram # pylint friendly initializations self.__lowest_Cp_step = None self.__weights = None """The beta weights for each feature.""" self.__trained_model = None """The model object after training that will be used for predictions.""" def __repr__(self): """String summary of the object """ return "LARS(type='%s', normalize=%s, intercept=%s, trace=%s, " \ "max_steps=%s, use_Gram=%s, " \ "enable_ca=%s)" % \ (self.__type, self.__normalize, self.__intercept, self.__trace, self.__max_steps, self.__use_Gram, str(self.ca.enabled)) @due.dcite(Doi('10.1214/009053604000000067'), path="mvpa2.clfs.lars:LARS", description="Least angle regression", tags=["implementation"]) def _train(self, data): """Train the classifier using `data` (`Dataset`). """ targets = data.sa[self.get_space()].value[:, np.newaxis] # some non-Python friendly R-lars arguments lars_kwargs = {'use.Gram': self.__use_Gram} if self.__max_steps is not None: lars_kwargs['max.steps'] = self.__max_steps trained_model = r.lars(data.samples, targets, type=self.__type, normalize=self.__normalize, intercept=self.__intercept, trace=self.__trace, **lars_kwargs) #import pydb #pydb.debugger() # find the step with the lowest Cp (risk) # it is often the last step if you set a max_steps # must first convert dictionary to array Cp_vals = None try: Cp_vals = np.asanyarray(Rrx2(trained_model, 'Cp')) except TypeError as e: raise FailedToTrainError("Failed to train %s on %s. Got '%s' while trying to access " \ "trained model %s" % (self, data, e, trained_model)) if Cp_vals is None: # if there were no any -- just choose 0th lowest_Cp_step = 0 elif np.isnan(Cp_vals[0]): # sometimes may come back nan, so just pick the last one lowest_Cp_step = len(Cp_vals) - 1 else: # determine the lowest lowest_Cp_step = Cp_vals.argmin() self.__lowest_Cp_step = lowest_Cp_step # set the weights to the lowest Cp step self.__weights = np.asanyarray(Rrx2(trained_model, 'beta'))[lowest_Cp_step] self.__trained_model = trained_model # bind to an instance # # set the weights to the final state # self.__weights = self.__trained_model['beta'][-1,:] @accepts_dataset_as_samples def _predict(self, data): """ Predict the output for the provided data. """ # predict with the final state (i.e., the last step) # predict with the lowest Cp step try: res = r.predict(self.__trained_model, data, mode='step', s=self.__lowest_Cp_step) #s=self.__trained_model['beta'].shape[0]) fit = np.atleast_1d(Rrx2(res, 'fit')) except RRuntimeError as e: raise FailedToPredictError("Failed to predict on %s using %s. Exceptions was: %s" \ % (data, self, e)) self.ca.estimates = fit return fit def _init_internals(self): """Reinitialize all internals """ self.__lowest_Cp_step = None self.__weights = None """The beta weights for each feature.""" self.__trained_model = None """The model object after training that will be used for predictions.""" def _untrain(self): super(LARS, self)._untrain() self._init_internals() ##REF: Name was automagically refactored def _get_feature_ids(self): """Return ids of the used features """ return np.where(np.abs(self.__weights) > 0)[0] ##REF: Name was automagically refactored def get_sensitivity_analyzer(self, **kwargs): """Returns a sensitivity analyzer for LARS.""" return LARSWeights(self, **kwargs) weights = property(lambda self: self.__weights)
class RFE(IterativeFeatureSelection): """Recursive feature elimination. A `FeaturewiseMeasure` is used to compute sensitivity maps given a certain dataset. These sensitivity maps are in turn used to discard unimportant features. For each feature selection the transfer error on some testdatset is computed. This procedure is repeated until a given `StoppingCriterion` is reached. References ---------- Such strategy after Guyon, I., Weston, J., Barnhill, S., & Vapnik, V. (2002). Gene selection for cancer classification using support vector machines. Mach. Learn., 46(1-3), 389--422. was applied to SVM-based analysis of fMRI data in Hanson, S. J. & Halchenko, Y. O. (2008). Brain reading using full brain support vector machines for object recognition: there is no "face identification area". Neural Computation, 20, 486--503. Examples -------- There are multiple possible ways to design an RFE. Here is one example which would rely on a SplitClassifier to extract sensitivities and provide estimate of performance (error) >>> # Lazy import >>> from mvpa2.suite import * >>> rfesvm_split = SplitClassifier(LinearCSVMC(), OddEvenPartitioner()) >>> # design an RFE feature selection to be used with a classifier >>> rfe = RFE(rfesvm_split.get_sensitivity_analyzer( ... # take sensitivities per each split, L2 norm, mean, abs them ... postproc=ChainMapper([ FxMapper('features', l2_normed), ... FxMapper('samples', np.mean), ... FxMapper('samples', np.abs)])), ... # use the error stored in the confusion matrix of split classifier ... ConfusionBasedError(rfesvm_split, confusion_state='stats'), ... # we just extract error from confusion, so no need to split dataset ... Repeater(2), ... # select 50% of the best on each step ... fselector=FractionTailSelector( ... 0.50, ... mode='select', tail='upper'), ... # and stop whenever error didn't improve for up to 10 steps ... stopping_criterion=NBackHistoryStopCrit(BestDetector(), 10), ... # we just extract it from existing confusion ... train_pmeasure=False, ... # but we do want to update sensitivities on each step ... update_sensitivity=True) >>> clf = FeatureSelectionClassifier( ... LinearCSVMC(), ... # on features selected via RFE ... rfe, ... # custom description ... descr='LinSVM+RFE(splits_avg)' ) Note: If you rely on cross-validation for the StoppingCriterion, make sure that you have at least 3 chunks so that SplitClassifier could have at least 2 chunks to split. Otherwise it can not split more (one chunk could not be splitted). """ history = ConditionalAttribute( doc="Last step # when each feature was still present") sensitivities = ConditionalAttribute( enabled=False, doc="History of sensitivities (might consume too much memory") def __init__(self, fmeasure, pmeasure, splitter, fselector=FractionTailSelector(0.05), update_sensitivity=True, nfeatures_min=0, **kwargs): # XXX Allow for multiple stopping criterions, e.g. error not decreasing # anymore OR number of features less than threshold """Initialize recursive feature elimination Parameters ---------- fmeasure : FeaturewiseMeasure pmeasure : Measure used to compute the transfer error of a classifier based on a certain feature set on the test dataset. NOTE: If sensitivity analyzer is based on the same classifier as transfer_error is using, make sure you initialize transfer_error with train=False, otherwise it would train classifier twice without any necessity. splitter: Splitter This splitter instance has to generate at least two dataset splits when called with the input dataset. The first split serves as the training dataset and the second as the evaluation dataset. fselector : Functor Given a sensitivity map it has to return the ids of those features that should be kept. update_sensitivity : bool If False the sensitivity map is only computed once and reused for each iteration. Otherwise the sensitivities are recomputed at each selection step. nfeatures_min : int Number of features for RFE to stop if reached. """ # bases init first IterativeFeatureSelection.__init__(self, fmeasure, pmeasure, splitter, fselector, **kwargs) self.__update_sensitivity = update_sensitivity """Flag whether sensitivity map is recomputed for each step.""" self._nfeatures_min = nfeatures_min def __repr__(self, prefixes=None): if prefixes is None: prefixes = [] return super(RFE, self).__repr__( prefixes=prefixes + _repr_attrs(self, ['update_sensitivity'], default=True)) @due.dcite(BibTeX(""" @Article{ GWB+02, author = "I. Guyon and J. Weston and S. Barnhill and V. Vapnik", title = "Gene Selection for Cancer Classification using Support Vector Machines", volume = "46", year = "2002", pages = "389--422", publisher = "Kluwer", address = "Hingham, MA, USA", journal = "Machine Learning" }"""), description="Recursive feature elimination procedure", tags=["implementation"]) @due.dcite(Doi("10.1162/neco.2007.09-06-340"), description="Full-brain fMRI decoding using SVM RFE", tags=["use"]) def _train(self, ds): """Proceed and select the features recursively eliminating less important ones. Parameters ---------- ds : Dataset used to compute sensitivity maps and train a classifier to determine the transfer error """ # get the initial split into train and test dataset, testdataset = self._get_traintest_ds(ds) if __debug__: debug('RFEC', "Initiating RFE with training on %s and testing using %s", (dataset, testdataset)) errors = [] """Computed error for each tested features set.""" ca = self.ca ca.nfeatures = [] """Number of features at each step. Since it is not used by the algorithm it is stored directly in the conditional attribute""" ca.history = np.arange(dataset.nfeatures) """Store the last step # when the feature was still present """ ca.sensitivities = [] stop = False """Flag when RFE should be stopped.""" results = None """Will hold the best feature set ever.""" wdataset = dataset """Operate on working dataset initially identical.""" wtestdataset = testdataset """Same feature selection has to be performs on test dataset as well. This will hold the current testdataset.""" step = 0 """Counter how many selection step where done.""" orig_feature_ids = np.arange(dataset.nfeatures) """List of feature Ids as per original dataset remaining at any given step""" sensitivity = None """Contains the latest sensitivity map.""" result_selected_ids = orig_feature_ids """Resultant ids of selected features. Since the best is not necessarily is the last - we better keep this one around. By default -- all features are there""" selected_ids = result_selected_ids isthebest = True """By default (e.g. no errors even estimated) every step is the best one """ while wdataset.nfeatures > 0: if __debug__: debug('RFEC', "Step %d: nfeatures=%d" % (step, wdataset.nfeatures)) # mark the features which are present at this step # if it brings anyb mentionable computational burden in the future, # only mark on removed features at each step ca.history[orig_feature_ids] = step # Compute sensitivity map if self.__update_sensitivity or sensitivity == None: sensitivity = self._fmeasure(wdataset) if len(sensitivity) > 1: raise ValueError( "RFE cannot handle multiple sensitivities at once. " "'%s' returned %i sensitivities." % (self._fmeasure.__class__.__name__, len(sensitivity))) if ca.is_enabled("sensitivities"): ca.sensitivities.append(sensitivity) if self._pmeasure: # get error for current feature set (handles optional retraining) error = np.asscalar( self._evaluate_pmeasure(wdataset, wtestdataset)) # Record the error errors.append(error) # Check if it is time to stop and if we got # the best result if self._stopping_criterion is not None: stop = self._stopping_criterion(errors) if self._bestdetector is not None: isthebest = self._bestdetector(errors) else: error = None nfeatures = wdataset.nfeatures if ca.is_enabled("nfeatures"): ca.nfeatures.append(wdataset.nfeatures) # store result if isthebest: result_selected_ids = orig_feature_ids if __debug__: debug( 'RFEC', "Step %d: nfeatures=%d error=%s best/stop=%d/%d " % (step, nfeatures, error, isthebest, stop)) # stop if it is time to finish if nfeatures == 1 or nfeatures <= self.nfeatures_min or stop: break # Select features to preserve selected_ids = self._fselector(sensitivity) if __debug__: debug( 'RFEC_', "Sensitivity: %s, nfeatures_selected=%d, selected_ids: %s" % (sensitivity, len(selected_ids), selected_ids)) # Create a dataset only with selected features wdataset = wdataset[:, selected_ids] # select corresponding sensitivity values if they are not # recomputed if not self.__update_sensitivity: if len(sensitivity.shape) >= 2: assert (sensitivity.shape[0] == 1 ) # there must be only 1 sample sensitivity = sensitivity[:, selected_ids] else: sensitivity = sensitivity[selected_ids] # need to update the test dataset as well # XXX why should it ever become None? # yoh: because we can have __transfer_error computed # using wdataset. See xia-generalization estimate # in lightsvm. Or for god's sake leave-one-out # on a wdataset # TODO: document these cases in this class if testdataset is not None: wtestdataset = wtestdataset[:, selected_ids] step += 1 # WARNING: THIS MUST BE THE LAST THING TO DO ON selected_ids selected_ids.sort() if self.ca.is_enabled("history") \ or self.ca.is_enabled('selected_ids'): orig_feature_ids = orig_feature_ids[selected_ids] # we already have the initial sensitivities, so even for a shared # classifier we can cleanup here if self._pmeasure: self._pmeasure.untrain() # charge conditional attributes self.ca.errors = errors self.ca.selected_ids = result_selected_ids if __debug__: debug('RFEC', "Selected %d features: %s", (len(result_selected_ids), result_selected_ids)) # announce desired features to the underlying slice mapper # do copy to survive later selections self._safe_assign_slicearg(copy(result_selected_ids)) # call super to set _Xshape etc super(RFE, self)._train(dataset) def _untrain(self): super(RFE, self)._untrain() if self._pmeasure: self._pmeasure.untrain() if self._fmeasure: self._fmeasure.untrain() def _get_nfeatures_min(self): return self._nfeatures_min def _set_nfeatures_min(self, v): if self.is_trained: self.untrain() if v < 0: raise ValueError("nfeatures_min must not be negative. Got %s" % v) self._nfeatures_min = v nfeatures_min = property(fget=_get_nfeatures_min, fset=_set_nfeatures_min) update_sensitivity = property(fget=lambda self: self.__update_sensitivity)
class SMLR(Classifier): """Sparse Multinomial Logistic Regression `Classifier`. This is an implementation of the SMLR algorithm published in :ref:`Krishnapuram et al., 2005 <KCF+05>` (2005, IEEE Transactions on Pattern Analysis and Machine Intelligence). Be sure to cite that article if you use this classifier for your work. """ __tags__ = [ 'smlr', 'linear', 'has_sensitivity', 'binary', 'multiclass', 'oneclass', 'does_feature_selection', 'random_tie_breaking' ] # XXX: later 'kernel-based'? lm = Parameter( .1, constraints=EnsureFloat() & EnsureRange(min=1e-10), doc="""The penalty term lambda. Larger values will give rise to more sparsification.""") convergence_tol = Parameter( 1e-3, constraints=EnsureFloat() & EnsureRange(min=1e-10, max=1.0), doc="""When the weight change for each cycle drops below this value the regression is considered converged. Smaller values lead to tighter convergence.""") resamp_decay = Parameter( 0.5, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0), doc="""Decay rate in the probability of resampling a zero weight. 1.0 will immediately decrease to the min_resamp from 1.0, 0.0 will never decrease from 1.0.""") min_resamp = Parameter( 0.001, constraints=EnsureFloat() & EnsureRange(min=1e-10, max=1.0), doc="Minimum resampling probability for zeroed weights") maxiter = Parameter( 10000, constraints=EnsureInt() & EnsureRange(min=1), doc="""Maximum number of iterations before stopping if not converged.""") has_bias = Parameter( True, constraints='bool', doc="""Whether to add a bias term to allow fits to data not through zero""") fit_all_weights = Parameter( True, constraints='bool', doc="""Whether to fit weights for all classes or to the number of classes minus one. Both should give nearly identical results, but if you set fit_all_weights to True it will take a little longer and yield weights that are fully analyzable for each class. Also, note that the convergence rate may be different, but convergence point is the same.""") implementation = Parameter(_DEFAULT_IMPLEMENTATION, constraints=EnsureChoice('C', 'Python'), doc="""Use C or Python as the implementation of stepwise_regression. C version brings significant speedup thus is the default one.""") ties = Parameter('random', constraints='str', doc="""Resolve ties which could occur. At the moment only obvious ties resulting in identical weights per two classes are detected and resolved randomly by injecting small amount of noise into the estimates of tied categories. Set to False to avoid this behavior""") seed = Parameter( _random_seed, constraints=EnsureNone() | EnsureInt(), doc="""Seed to be used to initialize random generator, might be used to replicate the run""") unsparsify = Parameter( False, constraints='bool', doc="""***EXPERIMENTAL*** Whether to unsparsify the weights via regression. Note that it likely leads to worse classifier performance, but more interpretable weights.""") std_to_keep = Parameter( 2.0, constraints='float', doc="""Standard deviation threshold of weights to keep when unsparsifying.""") def __init__(self, **kwargs): """Initialize an SMLR classifier. """ """ TODO: # Add in likelihood calculation # Add kernels, not just direct methods. """ # init base class first Classifier.__init__(self, **kwargs) if _cStepwiseRegression is None and self.params.implementation == 'C': warning('SMLR: C implementation is not available.' ' Using pure Python one') self.params.implementation = 'Python' # pylint friendly initializations self._ulabels = None """Unigue labels from the training set.""" self.__weights_all = None """Contains all weights including bias values""" self.__weights = None """Just the weights, without the biases""" self.__biases = None """The biases, will remain none if has_bias is False""" ##REF: Name was automagically refactored def _python_stepwise_regression(self, w, X, XY, Xw, E, auto_corr, lambda_over_2_auto_corr, S, M, maxiter, convergence_tol, resamp_decay, min_resamp, verbose, seed=None): """The (much slower) python version of the stepwise regression. I'm keeping this around for now so that we can compare results.""" # get the data information into easy vars ns, nd = X.shape # initialize the iterative optimization converged = False incr = np.finfo(np.float).max non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0 p_resamp = np.ones(w.shape, dtype=np.float) if seed is not None: # set the random seed np.random.seed(seed) if __debug__: debug("SMLR_", "random seed=%s" % seed) # perform the optimization while not converged and cycles < maxiter: # get the starting weight w_old = w[basis, m] # see if we're gonna update if (w_old != 0) or np.random.rand() < p_resamp[basis, m]: # let's do it # get the probability P = E[:, m] / S # set the gradient grad = XY[basis, m] - np.dot(X[:, basis], P) # calculate the new weight with the Laplacian prior w_new = w_old + grad / auto_corr[basis] # keep weights within bounds if w_new > lambda_over_2_auto_corr[basis]: w_new -= lambda_over_2_auto_corr[basis] changed = True # unmark from being zero if necessary if w_old == 0: non_zero += 1 # reset the prob of resampling p_resamp[basis, m] = 1.0 elif w_new < -lambda_over_2_auto_corr[basis]: w_new += lambda_over_2_auto_corr[basis] changed = True # unmark from being zero if necessary if w_old == 0: non_zero += 1 # reset the prob of resampling p_resamp[basis, m] = 1.0 else: # gonna zero it out w_new = 0.0 # decrease the p_resamp p_resamp[basis, m] -= (p_resamp[basis, m] - \ min_resamp) * resamp_decay # set number of non-zero if w_old == 0: changed = False wasted_basis += 1 else: changed = True non_zero -= 1 # process any changes if changed: #print "w[%d, %d] = %g" % (basis, m, w_new) # update the expected values w_diff = w_new - w_old Xw[:, m] = Xw[:, m] + X[:, basis] * w_diff E_new_m = np.exp(Xw[:, m]) S += E_new_m - E[:, m] E[:, m] = E_new_m # update the weight w[basis, m] = w_new # keep track of the sqrt sum squared diffs sum2_w_diff += w_diff * w_diff # add to the old no matter what sum2_w_old += w_old * w_old # update the class and basis m = np.mod(m + 1, w.shape[1]) if m == 0: # we completed a cycle of labels basis = np.mod(basis + 1, nd) if basis == 0: # we completed a cycle of features cycles += 1 # assess convergence incr = np.sqrt(sum2_w_diff) / \ (np.sqrt(sum2_w_old)+np.finfo(np.float).eps) # save the new weights converged = incr < convergence_tol if __debug__: debug("SMLR_", \ "cycle=%d ; incr=%g ; non_zero=%d ; " % (cycles, incr, non_zero) + "wasted_basis=%d ; " % (wasted_basis) + "sum2_w_old=%g ; sum2_w_diff=%g" % (sum2_w_old, sum2_w_diff)) # reset the sum diffs and wasted_basis sum2_w_diff = 0.0 sum2_w_old = 0.0 wasted_basis = 0 if not converged: raise ConvergenceError("More than %d Iterations without convergence" % \ (maxiter)) # calcualte the log likelihoods and posteriors for the training data #log_likelihood = x return cycles @due.dcite(Doi('10.1109/TPAMI.2005.127'), path="mvpa2.clfs.smlr:SMLR", description="Sparse multinomial-logistic regression classifier", tags=["implementation"]) def _train(self, dataset): """Train the classifier using `dataset` (`Dataset`). """ targets_sa_name = self.get_space() # name of targets sa targets_sa = dataset.sa[targets_sa_name] # actual targets sa # Process the labels to turn into 1 of N encoding uniquelabels = targets_sa.unique labels = _label2oneofm(targets_sa.value, uniquelabels) self._ulabels = uniquelabels.copy() Y = labels M = len(self._ulabels) # get the dataset information into easy vars X = dataset.samples # see if we are adding a bias term if self.params.has_bias: if __debug__: debug("SMLR_", "hstacking 1s for bias") # append the bias term to the features X = np.hstack((X, np.ones((X.shape[0], 1), dtype=X.dtype))) if self.params.implementation.upper() == 'C': _stepwise_regression = _cStepwiseRegression # # TODO: avoid copying to non-contig arrays, use strides in ctypes? if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']): if __debug__: debug("SMLR_", "Copying data to get it C_CONTIGUOUS/ALIGNED") X = np.array(X, copy=True, dtype=np.double, order='C') # currently must be double for the C code if X.dtype != np.double: if __debug__: debug("SMLR_", "Converting data to double") # must cast to double X = X.astype(np.double) # set the feature dimensions elif self.params.implementation.upper() == 'PYTHON': _stepwise_regression = self._python_stepwise_regression else: raise ValueError( "Unknown implementation %s of stepwise_regression" % self.params.implementation) # set the feature dimensions ns, nd = X.shape # decide the size of weights based on num classes estimated if self.params.fit_all_weights: c_to_fit = M else: c_to_fit = M - 1 # Precompute what we can auto_corr = ((M - 1.) / (2. * M)) * (np.sum(X * X, 0)) XY = np.dot(X.T, Y[:, :c_to_fit]) lambda_over_2_auto_corr = (self.params.lm / 2.) / auto_corr # set starting values w = np.zeros((nd, c_to_fit), dtype=np.double) Xw = np.zeros((ns, c_to_fit), dtype=np.double) E = np.ones((ns, c_to_fit), dtype=np.double) S = M * np.ones(ns, dtype=np.double) # set verbosity if __debug__: verbosity = int("SMLR_" in debug.active) debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed) else: verbosity = 0 # call the chosen version of stepwise_regression cycles = _stepwise_regression( w, X, XY, Xw, E, auto_corr, lambda_over_2_auto_corr, S, M, self.params.maxiter, self.params.convergence_tol, self.params.resamp_decay, self.params.min_resamp, verbosity, self.params.seed) if cycles >= self.params.maxiter: # did not converge raise ConvergenceError( "More than %d iterations without convergence" % self.params.maxiter) # see if unsparsify the weights if self.params.unsparsify: # unsparsify w = self._unsparsify_weights(X, w) # resolve ties if present self.__ties = None if self.params.ties: if self.params.ties == 'random': # check if there is a tie showing itself as absent # difference between two w's wdot = np.dot(w.T, -w) ties = np.where(np.max(np.abs(wdot), axis=0) == 0)[0] if len(ties): warning("SMLR: detected ties in categories %s. Small " "amount of noise will be injected into result " "estimates upon prediction to break the ties" % self._ulabels[ties]) self.__ties = ties ## w_non0 = np.nonzero(w) ## w_non0_max = np.max(np.abs(w[w_non0])) ## w_non0_idx = np.unique(w_non0[0]) ## w_non0_len = len(w_non0_idx) ## for f_idx in np.where(ties)[0]: ## w[w_non0_idx, f_idx] += \ ## 0.001 * np.random.normal(size=(w_non0_len,)) else: raise ValueError("Do not know how to treat ties=%r" % (self.params.ties, )) # save the weights self.__weights_all = w self.__weights = w[:dataset.nfeatures, :] if self.ca.is_enabled('feature_ids'): self.ca.feature_ids = np.where( np.max(np.abs(w[:dataset.nfeatures, :]), axis=1) > 0)[0] # and a bias if self.params.has_bias: self.__biases = w[-1, :] if __debug__: debug( 'SMLR', "train finished in %d cycles on data.shape=%s " % (cycles, X.shape) + "min:max(data)=%f:%f, got min:max(w)=%f:%f" % (np.min(X), np.max(X), np.min(w), np.max(w))) def _unsparsify_weights(self, samples, weights): """Unsparsify weights via least squares regression.""" # allocate for the new weights new_weights = np.zeros(weights.shape, dtype=np.double) # get the sample data we're predicting and the sum squared # total variance b = samples sst = np.power(b - b.mean(0), 2).sum(0) # loop over each column for i in range(weights.shape[1]): w = weights[:, i] # get the nonzero ind ind = w != 0 # get the features with non-zero weights a = b[:, ind] # predict all the data with the non-zero features betas = np.linalg.lstsq(a, b)[0] # determine the R^2 for each feature based on the sum # squared prediction error f = np.dot(a, betas) sse = np.power((b - f), 2).sum(0) rsquare = np.zeros(sse.shape, dtype=sse.dtype) gind = sst > 0 rsquare[gind] = 1 - (sse[gind] / sst[gind]) # derrive new weights by combining the betas and weights # scaled by the rsquare new_weights[:, i] = np.dot(w[ind], betas) * rsquare # take the tails tozero = np.abs( new_weights) < self.params.std_to_keep * np.std(new_weights) orig_zero = weights == 0.0 if orig_zero.sum() < tozero.sum(): # should not end up with fewer than start tozero = orig_zero new_weights[tozero] = 0.0 if __debug__: debug( 'SMLR_', "Start nonzero: %d; Finish nonzero: %d" % ((weights != 0).sum(), (new_weights != 0).sum())) return new_weights ##REF: Name was automagically refactored def _get_feature_ids(self): """Return ids of the used features """ return np.where(np.max(np.abs(self.__weights), axis=1) > 0)[0] @accepts_dataset_as_samples def _predict(self, data): """Predict the output for the provided data. """ # see if we are adding a bias term if self.params.has_bias: # append the bias term to the features data = np.hstack( (data, np.ones((data.shape[0], 1), dtype=data.dtype))) # append the zeros column to the weights if necessary if self.params.fit_all_weights: w = self.__weights_all else: w = np.hstack( (self.__weights_all, np.zeros( (self.__weights_all.shape[0], 1)))) # determine the probability values for making the prediction dot_prod = np.dot(data, w) E = np.exp(dot_prod) if self.__ties is not None: # 1e-5 should be adequate since anyways this is done # already after exponentiation E[:, self.__ties] += \ 1e-5 * np.random.normal(size=(len(E), len(self.__ties))) S = np.sum(E, 1) if __debug__: debug( 'SMLR', "predict on data.shape=%s min:max(data)=%f:%f " % (repr(data.shape), np.min(data), np.max(data)) + "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" % (np.min(w), np.max(w), np.min(dot_prod), np.max(dot_prod), np.min(E), np.max(E))) values = E / S[:, np.newaxis] #.repeat(E.shape[1], axis=1) self.ca.estimates = values # generate predictions predictions = np.asarray( [self._ulabels[np.argmax(vals)] for vals in values]) # no need to assign conditional attribute here -- would be done # in Classifier._postpredict anyway #self.predictions = predictions return predictions ##REF: Name was automagically refactored def get_sensitivity_analyzer(self, **kwargs): """Returns a sensitivity analyzer for SMLR.""" return SMLRWeights(self, **kwargs) biases = property(lambda self: self.__biases) weights = property(lambda self: self.__weights)
class BaseSearchlight(Measure): """Base class for searchlights. The idea for a searchlight algorithm stems from a paper by :ref:`Kriegeskorte et al. (2006) <KGB06>`. """ roi_sizes = ConditionalAttribute(enabled=False, doc="Number of features in each ROI.") roi_feature_ids = ConditionalAttribute( enabled=False, doc="Feature IDs for all generated ROIs.") roi_center_ids = ConditionalAttribute( enabled=True, doc="Center ID for all generated ROIs.") is_trained = True """Indicate that this measure is always trained.""" def __init__(self, queryengine, roi_ids=None, nproc=None, **kwargs): """ Parameters ---------- queryengine : QueryEngine Engine to use to discover the "neighborhood" of each feature. See :class:`~mvpa2.misc.neighborhood.QueryEngine`. roi_ids : None or list(int) or str List of query engine ids (e.g., feature ids, not coordinates, in case of `IndexQueryEngine`; and `node_indices` in case of `SurfaceQueryEngine`) that shall serve as ROI seeds (e.g., sphere centers). Alternatively, this can be the name of a feature attribute of the input dataset, whose non-zero values determine the feature ids (be careful to use it only with `IndexQueryEngine`). By default all query engine ids will be used. nproc : None or int How many processes to use for computation. Requires `pprocess` external module. If None -- all available cores will be used. **kwargs In addition this class supports all keyword arguments of its base-class :class:`~mvpa2.measures.base.Measure`. """ Measure.__init__(self, **kwargs) if nproc is not None and nproc > 1 and not externals.exists( 'pprocess'): raise RuntimeError("The 'pprocess' module is required for " "multiprocess searchlights. Please either " "install python-pprocess, or reduce `nproc` " "to 1 (got nproc=%i) or set to default None" % nproc) self._queryengine = queryengine if roi_ids is not None and not isinstance(roi_ids, str) \ and not len(roi_ids): raise ValueError, \ "Cannot run searchlight on an empty list of roi_ids" self.__roi_ids = roi_ids self.nproc = nproc def __repr__(self, prefixes=None): """String representation of a `Measure` Includes only arguments which differ from default ones """ if prefixes is None: prefixes = [] return super(BaseSearchlight, self).__repr__( prefixes=prefixes + _repr_attrs(self, ['queryengine', 'roi_ids', 'nproc'])) @due.dcite(Doi('10.1073/pnas.0600244103'), description="Searchlight analysis approach", tags=["implementation"]) @due.dcite( Doi('10.1038/nrn1931'), description= "Application of the searchlight approach to decoding using classifiers", tags=["use"]) def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess if on_osx: warning("Unable to determine automatically maximal number of " "cores on Mac OS X. Using 1") nproc = 1 else: try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning( "pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: qe_ids = self._queryengine.ids # known to qe if not set(qe_ids).issuperset(roi_ids): raise IndexError( "Some roi_ids are not known to the query engine %s: %s" % (self._queryengine, set(roi_ids).difference(qe_ids))) else: roi_ids = self._queryengine.ids # pass to subclass results = self._sl_call(dataset, roi_ids, nproc) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) # NNO if the orignal mapper has no append (because it's not a # chainmapper, for example), we make our own chainmapper. # # THe original code was: # mapper.append(StaticFeatureSelection(roi_ids, # dshape=dataset.shape[1:])) feat_sel_mapper = StaticFeatureSelection( roi_ids, dshape=dataset.shape[1:]) if 'append' in dir(mapper): mapper.append(feat_sel_mapper) else: mapper = ChainMapper([dataset.a.mapper, feat_sel_mapper]) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results def _sl_call(self, dataset, roi_ids, nproc): """Classical generic searchlight implementation """ raise NotImplementedError("Must be implemented in the derived classes") queryengine = property(fget=lambda self: self._queryengine) roi_ids = property(fget=lambda self: self.__roi_ids)
class GroupClusterThreshold(Learner): """Statistical evaluation of group-level average accuracy maps This algorithm can be used to perform cluster-thresholding of searchlight-based group analyses. It implements a two-stage procedure that uses the results of within-subject permutation analyses, estimates a per feature cluster forming threshold (via bootstrap), and uses the thresholded bootstrap samples to estimate the distribution of cluster sizes in group-average accuracy maps under the NULL hypothesis, as described in [1]_. Note: this class implements a modified version of that algorithm. The present implementation differs in, at least, four aspects from the description in that paper. 1) Cluster p-values refer to the probability of observing a particular cluster size or a larger one (original paper: probability to observe a larger cluster only). Consequently, probabilities reported by this implementation will have a tendency to be higher in comparison. 2) Clusters found in the original (unpermuted) accuracy map are always included in the NULL distribution estimate of cluster sizes. This provides an explicit lower bound for probabilities, as there will always be at least one observed cluster for every cluster size found in the original accuracy map. Consequently, it is impossible to get a probability of zero for clusters of any size (see [2] for more information). 3) Bootstrap accuracy maps that contain no clusters are counted in a dedicated size-zero bin in the NULL distribution of cluster sizes. This change yields reliable cluster-probabilities even for very low featurewise threshold probabilities, where (some portion) of the bootstrap accuracy maps do not contain any clusters. 4) The method for FWE-correction used by the original authors is not provided. Instead, a range of alternatives implemented by the statsmodels package are available. Moreover, this implementation minimizes the required memory demands and allows for computing large numbers of bootstrap samples without significant increase in memory demand (CPU time trade-off). Instances of this class must be trained before than can be used to threshold accuracy maps. The training dataset must match the following criteria: 1) For every subject in the group, it must contain multiple accuracy maps that are the result of a within-subject classification analysis based on permuted class labels. One map must corresponds to one fixed permutation for all features in the map, as described in [1]_. The original authors recommend 100 accuracy maps per subject for a typical searchlight analysis. 2) It must contain a sample attribute indicating which sample is associated with which subject, because bootstrapping average accuracy maps is implemented by randomly drawing one map from each subject. The name of the attribute can be configured via the ``chunk_attr`` parameter. After training, an instance can be called with a dataset to perform threshold and statistical evaluation. Unless a single-sample dataset is passed, all samples in the input dataset will be averaged prior thresholding. Returns ------- Dataset This is a shallow copy of the input dataset (after a potential averaging), hence contains the same data and attributes. In addition it includes the following attributes: ``fa.featurewise_thresh`` Vector with feature-wise cluster-forming thresholds. ``fa.clusters_featurewise_thresh`` Vector with labels for clusters after thresholding the input data with the desired feature-wise probability. Each unique non-zero element corresponds to an individual super-threshold cluster. Cluster values are sorted by cluster size (number of features). The largest cluster is always labeled with ``1``. ``fa.clusters_fwe_thresh`` Vector with labels for super-threshold clusters after correction for multiple comparisons. The attribute is derived from ``fa.clusters_featurewise_thresh`` by removing all clusters that do not pass the threshold when controlling for the family-wise error rate. ``a.clusterstats`` Record array with information on all detected clusters. The array is sorted according to cluster size, starting with the largest cluster in terms of number of features. The array contains the fields ``size`` (number of features comprising the cluster), ``mean``, ``median``, min``, ``max``, ``std`` (respective descriptive statistics for all clusters), and ``prob_raw`` (probability of observing the cluster of a this size or larger under the NULL hypothesis). If correction for multiple comparisons is enabled an additional field ``prob_corrected`` (probability after correction) is added. ``a.clusterlocations`` Record array with information on the location of all detected clusters. The array is sorted according to cluster size (same order as ``a.clusterstats``. The array contains the fields ``max`` (feature coordinate of the maximum score within the cluster, and ``center_of_mass`` (coordinate of the center of mass; weighted by the feature values within the cluster. References ---------- .. [1] Johannes Stelzer, Yi Chen and Robert Turner (2013). Statistical inference and multiple testing correction in classification-based multi-voxel pattern analysis (MVPA): Random permutations and cluster size control. NeuroImage, 65, 69--82. .. [2] Smyth, G. K., & Phipson, B. (2010). Permutation P-values Should Never Be Zero: Calculating Exact P-values When Permutations Are Randomly Drawn. Statistical Applications in Genetics and Molecular Biology, 9, 1--12. """ n_bootstrap = Parameter( 100000, constraints=EnsureInt() & EnsureRange(min=1), doc="""Number of bootstrap samples to be generated from the training dataset. For each sample, an average map will be computed from a set of randomly drawn samples (one from each chunk). Bootstrap samples will be used to estimate a featurewise NULL distribution of accuracy values for initial thresholding, and to estimate the NULL distribution of cluster sizes under the NULL hypothesis. A larger number of bootstrap samples reduces the lower bound of probabilities, which may be beneficial for multiple comparison correction.""") feature_thresh_prob = Parameter( 0.001, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0), doc="""Feature-wise probability threshold. The value corresponding to this probability in the NULL distribution of accuracies will be used as threshold for cluster forming. Given that the NULL distribution is estimated per feature, the actual threshold value will vary across features yielding a threshold vector. The number of bootstrap samples need to be adequate for a desired probability. A ``ValueError`` is raised otherwise.""") chunk_attr = Parameter( 'chunks', doc="""Name of the attribute indicating the individual chunks from which a single sample each is drawn for averaging into a bootstrap sample.""") fwe_rate = Parameter( 0.05, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0), doc="""Family-wise error rate for multiple comparison correction of cluster size probabilities.""") multicomp_correction = Parameter( 'fdr_bh', constraints=EnsureChoice('bonferroni', 'sidak', 'holm-sidak', 'holm', 'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by', None), doc="""Strategy for multiple comparison correction of cluster probabilities. All methods supported by statsmodels' ``multitest`` are available. In addition, ``None`` can be specified to disable correction.""") n_blocks = Parameter( 1, constraints=EnsureInt() & EnsureRange(min=1), doc="""Number of segments used to compute the feature-wise NULL distributions. This parameter determines the peak memory demand. In case of a single segment a matrix of size (n_bootstrap x nfeatures) will be allocated. Increasing the number of segments reduces the peak memory demand by that roughly factor. """) n_proc = Parameter( 1, constraints=EnsureInt() & EnsureRange(min=1), doc="""Number of parallel processes to use for computation. Requires `joblib` external module.""") def __init__(self, **kwargs): # force disable auto-train: would make no sense Learner.__init__(self, auto_train=False, **kwargs) if 1. / (self.params.n_bootstrap + 1) > self.params.feature_thresh_prob: raise ValueError('number of bootstrap samples is insufficient for' ' the desired threshold probability') self.untrain() def _untrain(self): self._thrmap = None self._null_cluster_sizes = None @due.dcite( Doi("10.1016/j.neuroimage.2012.09.063"), description="Statistical assessment of (searchlight) MVPA results", tags=['implementation']) def _train(self, ds): # shortcuts chunk_attr = self.params.chunk_attr # # Step 0: bootstrap maps by drawing one for each chunk and average them # (do N iterations) # this could take a lot of memory, hence instead of computing the maps # we compute the source maps they can be computed from and then (re)build # the matrix of bootstrapped maps either row-wise or column-wise (as # needed) to save memory by a factor of (close to) `n_bootstrap` # which samples belong to which chunk chunk_samples = dict([(c, np.where(ds.sa[chunk_attr].value == c)[0]) for c in ds.sa[chunk_attr].unique]) # pre-built the bootstrap combinations bcombos = [[random.sample(v, 1)[0] for v in list(chunk_samples.values())] for i in range(self.params.n_bootstrap)] bcombos = np.array(bcombos, dtype=int) # # Step 1: find the per-feature threshold that corresponds to some p # in the NULL segwidth = ds.nfeatures / self.params.n_blocks # speed things up by operating on an array not a dataset ds_samples = ds.samples if __debug__: debug('GCTHR', 'Compute per-feature thresholds in %i blocks of %i features' % (self.params.n_blocks, segwidth)) # Execution can be done in parallel as the estimation is independent # across features def featuresegment_producer(ncols): for segstart in range(0, ds.nfeatures, ncols): # one average map for every stored bcombo # this also slices the input data into feature subsets # for the compute blocks yield [np.mean( # get a view to a subset of the features # -- should be somewhat efficient as feature axis is # sliced ds_samples[sidx, segstart:segstart + ncols], axis=0) for sidx in bcombos] if self.params.n_proc == 1: # Serial execution thrmap = np.hstack( # merge across compute blocks [get_thresholding_map(d, self.params.feature_thresh_prob) # compute a partial threshold map for as many features # as fit into a compute block for d in featuresegment_producer(segwidth)]) else: # Parallel execution verbose_level_parallel = 50 \ if (__debug__ and 'GCTHR' in debug.active) else 0 # local import as only parallel execution needs this from joblib import Parallel, delayed # same code as above, just in parallel with joblib's Parallel thrmap = np.hstack( Parallel(n_jobs=self.params.n_proc, pre_dispatch=self.params.n_proc, verbose=verbose_level_parallel)( delayed(get_thresholding_map) (d, self.params.feature_thresh_prob) for d in featuresegment_producer(segwidth))) # store for later thresholding of input data self._thrmap = thrmap # # Step 2: threshold all NULL maps and build distribution of NULL cluster # sizes # cluster_sizes = Counter() # recompute the bootstrap average maps to threshold them and determine # cluster sizes dsa = dict(mapper=ds.a.mapper) if 'mapper' in ds.a else {} if __debug__: debug('GCTHR', 'Estimating NULL distribution of cluster sizes') # this step can be computed in parallel chunks to speeds things up if self.params.n_proc == 1: # Serial execution for sidx in bcombos: avgmap = np.mean(ds_samples[sidx], axis=0)[None] # apply threshold clustermap = avgmap > thrmap # wrap into a throw-away dataset to get the reverse mapping right bds = Dataset(clustermap, a=dsa) # this function reverse-maps every sample one-by-one, hence no need # to collect chunks of bootstrapped maps cluster_sizes = get_cluster_sizes(bds, cluster_sizes) else: # Parallel execution # same code as above, just restructured for joblib's Parallel for jobres in Parallel(n_jobs=self.params.n_proc, pre_dispatch=self.params.n_proc, verbose=verbose_level_parallel)( delayed(get_cluster_sizes) (Dataset(np.mean(ds_samples[sidx], axis=0)[None] > thrmap, a=dsa)) for sidx in bcombos): # aggregate cluster_sizes += jobres # store cluster size histogram for later p-value evaluation # use a sparse matrix for easy consumption (max dim is the number of # features, i.e. biggest possible cluster) scl = dok_matrix((1, ds.nfeatures + 1), dtype=int) for s in cluster_sizes: scl[0, s] = cluster_sizes[s] self._null_cluster_sizes = scl def _call(self, ds): if len(ds) > 1: # average all samples into one, assuming we got something like one # sample per subject as input avgr = mean_sample() ds = avgr(ds) # threshold input; at this point we only have one sample left thrd = ds.samples[0] > self._thrmap # mapper default mapper = IdentityMapper() # overwrite if possible if hasattr(ds, 'a') and 'mapper' in ds.a: mapper = ds.a.mapper # reverse-map input othrd = _verified_reverse1(mapper, thrd) # TODO: what is your purpose in life osamp? ;-) osamp = _verified_reverse1(mapper, ds.samples[0]) # prep output dataset outds = ds.copy(deep=False) outds.fa['featurewise_thresh'] = self._thrmap # determine clusters labels, num = measurements.label(othrd) area = measurements.sum(othrd, labels, index=np.arange(1, num + 1)).astype(int) com = measurements.center_of_mass( osamp, labels=labels, index=np.arange(1, num + 1)) maxpos = measurements.maximum_position( osamp, labels=labels, index=np.arange(1, num + 1)) # for the rest we need the labels flattened labels = mapper.forward1(labels) # relabel clusters starting with the biggest and increase index with # decreasing size ordered_labels = np.zeros(labels.shape, dtype=int) ordered_area = np.zeros(area.shape, dtype=int) ordered_com = np.zeros((num, len(osamp.shape)), dtype=float) ordered_maxpos = np.zeros((num, len(osamp.shape)), dtype=float) for i, idx in enumerate(np.argsort(area)): ordered_labels[labels == idx + 1] = num - i # kinda ugly, but we are looping anyway ordered_area[i] = area[idx] ordered_com[i] = com[idx] ordered_maxpos[i] = maxpos[idx] labels = ordered_labels area = ordered_area[::-1] com = ordered_com[::-1] maxpos = ordered_maxpos[::-1] del ordered_labels # this one can be big # store cluster labels after forward-mapping outds.fa['clusters_featurewise_thresh'] = labels.copy() # location info outds.a['clusterlocations'] = \ np.rec.fromarrays( [com, maxpos], names=('center_of_mass', 'max')) # update cluster size histogram with the actual result to get a # proper lower bound for p-values # this will make a copy, because the original matrix is int cluster_probs_raw = _transform_to_pvals( area, self._null_cluster_sizes.astype('float')) clusterstats = ( [area, cluster_probs_raw], ['size', 'prob_raw'] ) # evaluate a bunch of stats for all clusters morestats = {} for cid in range(len(area)): # keep clusters on outer loop, because selection is more expensive clvals = ds.samples[0, labels == cid + 1] for id_, fx in ( ('mean', np.mean), ('median', np.median), ('min', np.min), ('max', np.max), ('std', np.std)): stats = morestats.get(id_, []) stats.append(fx(clvals)) morestats[id_] = stats for k, v in list(morestats.items()): clusterstats[0].append(v) clusterstats[1].append(k) if self.params.multicomp_correction is not None: # do a local import as only this tiny portion needs statsmodels import statsmodels.stats.multitest as smm rej, probs_corr = smm.multipletests( cluster_probs_raw, alpha=self.params.fwe_rate, method=self.params.multicomp_correction)[:2] # store corrected per-cluster probabilities clusterstats[0].append(probs_corr) clusterstats[1].append('prob_corrected') # remove cluster labels that did not pass the FWE threshold for i, r in enumerate(rej): if not r: labels[labels == i + 1] = 0 outds.fa['clusters_fwe_thresh'] = labels outds.a['clusterstats'] = \ np.rec.fromarrays(clusterstats[0], names=clusterstats[1]) return outds
class Hyperalignment(ClassWithCollections): """Align the features across multiple datasets into a common feature space. This is a three-level algorithm. In the first level, a series of input datasets is projected into a common feature space using a configurable mapper. The common space is initially defined by a chosen exemplar from the list of input datasets, but is subsequently refined by iteratively combining the common space with the projected input datasets. In the second (optional) level, the original input datasets are again aligned with (or projected into) the intermediate first-level common space. Through a configurable number of iterations the common space is further refined by repeated projections of the input datasets and combination/aggregation of these projections into an updated common space. In the third level, the input datasets are again aligned with the, now final, common feature space. The output of this algorithm are trained mappers (one for each input dataset) that transform the individual features spaces into the common space. Level 1 and 2 are performed by the ``train()`` method, and level 3 is performed when the trained Hyperalignment instance is called with a list of datasets. This dataset list may or may not be identical to the training datasets. The default values for the parameters of the algorithm (e.g. projection via Procrustean transformation, common space aggregation by averaging) resemble the setup reported in :ref:`Haxby et al., Neuron (2011) <HGC+11>` *A common, high-dimensional model of the representational space in human ventral temporal cortex.* Examples -------- >>> # get some example data >>> from mvpa2.testing.datasets import datasets >>> from mvpa2.misc.data_generators import random_affine_transformation >>> ds4l = datasets['uni4large'] >>> # generate a number of distorted variants of this data >>> dss = [random_affine_transformation(ds4l) for i in xrange(4)] >>> ha = Hyperalignment() >>> ha.train(dss) >>> mappers = ha(dss) >>> len(mappers) 4 """ training_residual_errors = ConditionalAttribute(enabled=False, doc="""Residual error (norm of the difference between common space and projected data) per each training dataset at each level. The residuals are stored in a dataset with one row per level, and one column per input dataset. The first row corresponds to the error 1st-level of hyperalignment the remaining rows store the residual errors for each 2nd-level iteration.""") residual_errors = ConditionalAttribute(enabled=False, doc="""Residual error (norm of the difference between common space and projected data) per each dataset. The residuals are stored in a single-row dataset with one column per input dataset.""") # XXX Who cares whether it was chosen, or specified? This should be just # 'ref_ds' chosen_ref_ds = ConditionalAttribute(enabled=True, doc="""Index of the input dataset used as 1st-level reference dataset.""") # Lets use built-in facilities to specify parameters which # constructor should accept # the ``space`` of the mapper determines where the algorithm places the # common space definition in the datasets alignment = Parameter(ProcrusteanMapper(space='commonspace'), # might provide allowedtype # XXX Currently, there's no way to handle this with constraints doc="""The multidimensional transformation mapper. If `None` (default) an instance of :class:`~mvpa2.mappers.procrustean.ProcrusteanMapper` is used.""") alpha = Parameter(1, constraints=EnsureFloat() & EnsureRange(min=0, max=1), doc="""Regularization parameter to traverse between (Shrinkage)-CCA (canonical correlation analysis) and regular hyperalignment. Setting alpha to 1 makes the algorithm identical to hyperalignment and alpha of 0 makes it CCA. By default, it is 1, therefore hyperalignment. """) level2_niter = Parameter(1, constraints=EnsureInt() & EnsureRange(min=0), doc="Number of 2nd-level iterations.") ref_ds = Parameter(None, constraints=(EnsureRange(min=0) & EnsureInt() | EnsureNone()), doc="""Index of a dataset to use as 1st-level common space reference. If `None`, then the dataset with the maximum number of features is used.""") zscore_all = Parameter(False, constraints='bool', doc="""Flag to Z-score all datasets prior hyperalignment. Turn it off if Z-scoring is not desired or was already performed. If True, returned mappers are ChainMappers with the Z-scoring prepended to the actual projection.""") zscore_common = Parameter(True, constraints='bool', doc="""Flag to Z-score the common space after each adjustment. This should be left enabled in most cases.""") combiner1 = Parameter(lambda x,y: 0.5*(x+y), # doc="""How to update common space in the 1st-level loop. This must be a callable that takes two arguments. The first argument is one of the input datasets after projection onto the 1st-level common space. The second argument is the current 1st-level common space. The 1st-level combiner is called iteratively for each projected input dataset, except for the reference dataset. By default the new common space is the average of the current common space and the recently projected dataset.""") combiner2 = Parameter(lambda l: np.mean(l, axis=0), doc="""How to combine all individual spaces to common space. This must be a callable that take a sequence of datasets as an argument. The callable must return a single array. This combiner is called once with all datasets after 1st-level projection to create an updated common space, and is subsequently called again after each 2nd-level iteration.""") def __init__(self, **kwargs): ClassWithCollections.__init__(self, **kwargs) self.commonspace = None @due.dcite( Doi('10.1016/j.neuron.2011.08.026'), description="Hyperalignment of data to a common space", tags=["implementation"]) def train(self, datasets): """Derive a common feature space from a series of datasets. Parameters ---------- datasets : sequence of datasets Returns ------- A list of trained Mappers matching the number of input datasets. """ params = self.params # for quicker access ;) ca = self.ca ndatasets = len(datasets) nfeatures = [ds.nfeatures for ds in datasets] alpha = params.alpha residuals = None if ca['training_residual_errors'].enabled: residuals = np.zeros((1 + params.level2_niter, ndatasets)) ca.training_residual_errors = Dataset( samples = residuals, sa = {'levels' : ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)]}) if __debug__: debug('HPAL', "Hyperalignment %s for %i datasets" % (self, ndatasets)) if params.ref_ds is None: ref_ds = np.argmax(nfeatures) else: ref_ds = params.ref_ds # Making sure that ref_ds is within range. #Parameter() already checks for it being a non-negative integer if ref_ds >= ndatasets: raise ValueError, "Requested reference dataset %i is out of " \ "bounds. We have only %i datasets provided" \ % (ref_ds, ndatasets) ca.chosen_ref_ds = ref_ds # zscore all data sets # ds = [ zscore(ds, chunks_attr=None) for ds in datasets] # TODO since we are doing in-place zscoring create deep copies # of the datasets with pruned targets and shallow copies of # the collections (if they would come needed in the transformation) # TODO: handle floats and non-floats differently to prevent # waste of memory if there is no need (e.g. no z-scoring) #otargets = [ds.sa.targets for ds in datasets] datasets = [ds.copy(deep=False) for ds in datasets] #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)}) #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)}) # for ds in datasets] if params.zscore_all: if __debug__: debug('HPAL', "Z-scoring all datasets") for ids in xrange(len(datasets)): zmapper = ZScoreMapper(chunks_attr=None) zmapper.train(datasets[ids]) datasets[ids] = zmapper.forward(datasets[ids]) if alpha < 1: datasets, wmappers = self._regularize(datasets, alpha) # initial common space is the reference dataset commonspace = datasets[ref_ds].samples # the reference dataset might have been zscored already, don't do it # twice if params.zscore_common and not params.zscore_all: if __debug__: debug('HPAL_', "Creating copy of a commonspace and assuring " "it is of a floating type") commonspace = commonspace.astype(float) zscore(commonspace, chunks_attr=None) # create a mapper per dataset # might prefer some other way to initialize... later mappers = [deepcopy(params.alignment) for ds in datasets] # # Level 1 -- initial projection # lvl1_projdata = self._level1(datasets, commonspace, ref_ds, mappers, residuals) # # Level 2 -- might iterate multiple times # # this is the final common space self.commonspace = self._level2(datasets, lvl1_projdata, mappers, residuals) def __call__(self, datasets): """Derive a common feature space from a series of datasets. Parameters ---------- datasets : sequence of datasets Returns ------- A list of trained Mappers matching the number of input datasets. """ if self.commonspace is None: self.train(datasets) # place datasets into a copy of the list since items # will be reassigned datasets = list(datasets) params = self.params # for quicker access ;) alpha = params.alpha # for letting me be lazy ;) if params.zscore_all: if __debug__: debug('HPAL', "Z-scoring all datasets") # zscore them once while storing corresponding ZScoreMapper's # so we can assemble a comprehensive mapper at the end # (together with procrustes) zmappers = [] for ids in xrange(len(datasets)): zmapper = ZScoreMapper(chunks_attr=None) zmappers.append(zmapper) zmapper.train(datasets[ids]) datasets[ids] = zmapper.forward(datasets[ids]) if alpha < 1: datasets, wmappers = self._regularize(datasets, alpha) # # Level 3 -- final, from-scratch, alignment to final common space # mappers = self._level3(datasets) # return trained mappers for projection from all datasets into the # common space if params.zscore_all: # We need to construct new mappers which would chain # zscore and then final transformation if params.alpha < 1: return [ChainMapper([zm, wm, m]) for zm, wm, m in zip(zmappers, wmappers, mappers)] else: return [ChainMapper([zm, m]) for zm, m in zip(zmappers, mappers)] else: if params.alpha < 1: return [ChainMapper([wm, m]) for wm, m in zip(wmappers, mappers)] else: return mappers def _regularize(self, datasets, alpha): if __debug__: debug('HPAL', "Using regularized hyperalignment with alpha of %d" % alpha) wmappers = [] for ids in xrange(len(datasets)): U, S, Vh = np.linalg.svd(datasets[ids]) S = 1/np.sqrt( (1-alpha)*np.square(S) + alpha ) S.resize(len(Vh)) S = np.matrix(np.diag(S)) W = np.matrix(Vh.T)*S*np.matrix(Vh) wmapper = StaticProjectionMapper(proj=W, auto_train=False) wmapper.train(datasets[ids]) wmappers.append(wmapper) datasets[ids] = wmapper.forward(datasets[ids]) return datasets, wmappers def _level1(self, datasets, commonspace, ref_ds, mappers, residuals): params = self.params # for quicker access ;) data_mapped = [ds.samples for ds in datasets] for i, (m, ds_new) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 1: ds #%i" % i) if i == ref_ds: continue # assign common space to ``space`` of the mapper, because this is # where it will be looking for it ds_new.sa[m.get_space()] = commonspace # find transformation of this dataset into the current common space m.train(ds_new) # remove common space attribute again to save on memory when the # common space is updated for the next iteration del ds_new.sa[m.get_space()] # project this dataset into the current common space ds_ = m.forward(ds_new.samples) if params.zscore_common: zscore(ds_, chunks_attr=None) # replace original dataset with mapped one -- only the reference # dataset will remain unchanged data_mapped[i] = ds_ # compute first-level residuals wrt to the initial common space if residuals is not None: residuals[0, i] = np.linalg.norm(ds_ - commonspace) # Update the common space. This is an incremental update after # processing each 1st-level dataset. Maybe there should be a flag # to make a batch update after processing all 1st-level datasets # to an identical 1st-level common space # TODO: make just a function so we dont' waste space commonspace = params.combiner1(ds_, commonspace) if params.zscore_common: zscore(commonspace, chunks_attr=None) return data_mapped def _level2(self, datasets, lvl1_data, mappers, residuals): params = self.params # for quicker access ;) data_mapped = lvl1_data # aggregate all processed 1st-level datasets into a new 2nd-level # common space commonspace = params.combiner2(data_mapped) # XXX Why is this commented out? Who knows what combiner2 is doing and # whether it changes the distribution of the data #if params.zscore_common: #zscore(commonspace, chunks_attr=None) ndatasets = len(datasets) for loop in xrange(params.level2_niter): # 2nd-level alignment starts from the original/unprojected datasets # again for i, (m, ds_new) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 2 (%i-th iteration): ds #%i" % (loop, i)) # Optimization speed up heuristic # Slightly modify the common space towards other feature # spaces and reduce influence of this feature space for the # to-be-computed projection temp_commonspace = (commonspace * ndatasets - data_mapped[i]) \ / (ndatasets - 1) if params.zscore_common: zscore(temp_commonspace, chunks_attr=None) # assign current common space ds_new.sa[m.get_space()] = temp_commonspace # retrain the mapper for this dataset m.train(ds_new) # remove common space attribute again to save on memory when the # common space is updated for the next iteration del ds_new.sa[m.get_space()] # obtain the 2nd-level projection ds_ = m.forward(ds_new.samples) if params.zscore_common: zscore(ds_, chunks_attr=None) # store for 2nd-level combiner data_mapped[i] = ds_ # compute residuals if residuals is not None: residuals[1+loop, i] = np.linalg.norm(ds_ - commonspace) commonspace = params.combiner2(data_mapped) # and again if params.zscore_common: zscore(commonspace, chunks_attr=None) # return the final common space return commonspace def _level3(self, datasets): params = self.params # for quicker access ;) # create a mapper per dataset mappers = [deepcopy(params.alignment) for ds in datasets] # key different from level-2; the common space is uniform #temp_commonspace = commonspace residuals = None if self.ca['residual_errors'].enabled: residuals = np.zeros((1, len(datasets))) self.ca.residual_errors = Dataset(samples=residuals) # start from original input datasets again for i, (m, ds_new) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 3: ds #%i" % i) # retrain mapper on final common space ds_new.sa[m.get_space()] = self.commonspace m.train(ds_new) # remove common space attribute again to save on memory del ds_new.sa[m.get_space()] if residuals is not None: # obtain final projection data_mapped = m.forward(ds_new.samples) residuals[0, i] = np.linalg.norm(data_mapped - self.commonspace) return mappers
class SearchlightHyperalignment(ClassWithCollections): """ Given a list of datasets, provide a list of mappers into common space using searchlight based hyperalignment. :ref:`Guntupalli et al., Cerebral Cortex (2016)` 1) Input datasets should all be of the same size in terms of nsamples and nfeatures, and be coarsely aligned (using anatomy). 2) All features in all datasets should be zscored. 3) Datasets should have feature attribute `voxel_indices` containing spatial coordinates of all features """ # TODO: add {training_,}residual_errors .ca ? ## Parameters common with Hyperalignment but overriden ref_ds = Parameter(0, constraints=EnsureInt() & EnsureRange(min=0), doc="""Index of a dataset to use as a reference. First dataset is used as default. If you supply exclude_from_model list, you should supply the ref_ds index as index before you remove those excluded datasets. Note that unlike regular Hyperalignment, there is no automagic choosing of the "best" ref_ds by default.""") ## Parameters specific to SearchlightHyperalignment queryengine = Parameter( None, doc="""A single (or a list of query engines, one per each dataset) to be used. If not provided, volumetric searchlight, with spherical neighborhood as instructed by radius parameter will be used.""") radius = Parameter( 3, constraints=EnsureInt() & EnsureRange(min=1), doc="""Radius of a searchlight sphere in number of voxels to be used if no `queryengine` argument was provided.""") nproc = Parameter( 1, constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(), doc="""Number of cores to use.""") nblocks = Parameter( None, constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(), doc="""Number of blocks to divide to process. Higher number results in smaller memory consumption.""") sparse_radius = Parameter( None, constraints=(EnsureRange(min=1) & EnsureInt() | EnsureNone()), doc="""Radius supplied to scatter_neighborhoods in units of voxels. This is effectively the distance between the centers where hyperalignment is performed in searchlights. ATM applicable only if no custom queryengine was provided. If None, hyperalignment is performed at every voxel (default).""") hyperalignment = Parameter( Hyperalignment(ref_ds=None), doc="""Hyperalignment instance to be used in each searchlight sphere. Default is just the Hyperalignment instance with default parameters. Its `ref_ds` parameter would be overridden by the `ref_ds` parameter of this SearchlightHyperalignment instance because we want to be consistent and only need one `ref_ds`.""") combine_neighbormappers = Parameter( True, constraints=EnsureBool(), doc="""This param determines whether to combine mappers for each voxel from its neighborhood searchlights or just use the mapper for which it is the center voxel. This will not be applicable for certain queryengines whose ids and neighborhoods are from different spaces, such as for SurfaceVerticesQueryEngine""") compute_recon = Parameter( True, constraints=EnsureBool(), doc="""This param determines whether to compute reverse mappers for each subject from common-space to subject space. These will be stored in the StaticProjectionMapper() and used when reverse() is called. Enabling it will double the size of the mappers returned.""") featsel = Parameter( 1.0, constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0) | EnsureInt() & EnsureRange(min=2), doc="""Determines if feature selection will be performed in each searchlight. 1.0: Use all features. < 1.0 is understood as selecting that proportion of features in each searchlight of ref_ds using feature scores; > 1.0 is understood as selecting at most that many features in each searchlight.""") # TODO: Should we get rid of this feature? use_same_features = Parameter( False, constraints=EnsureBool(), doc="""Select the same (best) features when doing feature selection for all datasets.""") exclude_from_model = Parameter( [], constraints=EnsureListOf(int), doc="""List of dataset indices that will not participate in building common model. These will still get mappers back but they don't influence the model or voxel selection.""") mask_node_ids = Parameter( None, constraints=EnsureListOf(int) | EnsureNone(), doc="""You can specify a mask to compute searchlight hyperalignment only within this mask. These would be a list of voxel indices.""") dtype = Parameter( 'float32', constraints='str', doc="""dtype of elements transformation matrices to save on memory for big datasets""") results_backend = Parameter( 'hdf5', constraints=EnsureChoice('hdf5', 'native'), doc="""'hdf5' or 'native'. See Searchlight documentation.""") tmp_prefix = Parameter( 'tmpsl', constraints='str', doc="""Prefix for temporary files. See Searchlight documentation.""") def __init__(self, **kwargs): _shpaldebug("Initializing.") ClassWithCollections.__init__(self, **kwargs) self.ndatasets = 0 self.nfeatures = 0 self.projections = None # This option makes the roi_seed in each SL to be selected during feature selection self.force_roi_seed = True if self.params.nproc is not None and self.params.nproc > 1 \ and not externals.exists('pprocess'): raise RuntimeError("The 'pprocess' module is required for " "multiprocess searchlights. Please either " "install python-pprocess, or reduce `nproc` " "to 1 (got nproc=%i) or set to default None" % self.params.nproc) if not externals.exists('scipy'): raise RuntimeError("The 'scipy' module is required for " "searchlight hyperalignment.") if self.params.results_backend == 'native': raise NotImplementedError("'native' mode to handle results is still a " "work in progress.") #warning("results_backend is set to 'native'. This has been known" # "to result in longer run time when working with big datasets.") if self.params.results_backend == 'hdf5' and \ not externals.exists('h5py'): raise RuntimeError("The 'hdf5' module is required for " "when results_backend is set to 'hdf5'") def _proc_block(self, block, datasets, featselhyper, queryengines, seed=None, iblock='main'): if seed is not None: mvpa2.seed(seed) if __debug__: debug('SLC', 'Starting computing block for %i elements' % len(block)) bar = ProgressBar() projections = [csc_matrix((self.nfeatures, self.nfeatures), dtype=self.params.dtype) for isub in range(self.ndatasets)] for i, node_id in enumerate(block): # retrieve the feature ids of all features in the ROI from the query # engine # Find the neighborhood for that selected nearest node roi_feature_ids_all = [qe[node_id] for qe in queryengines] # handling queryengines that return AttrDatasets for isub in range(len(roi_feature_ids_all)): if is_datasetlike(roi_feature_ids_all[isub]): # making sure queryengine returned proper shaped output assert(roi_feature_ids_all[isub].nsamples == 1) roi_feature_ids_all[isub] = roi_feature_ids_all[isub].samples[0, :].tolist() if len(roi_feature_ids_all) == 1: # just one was provided to be "broadcasted" roi_feature_ids_all *= len(datasets) # if qe returns zero-sized ROI for any subject, pass... if any(len(x)==0 for x in roi_feature_ids_all): continue # selecting neighborhood for all subject for hyperalignment ds_temp = [sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)] if self.force_roi_seed: roi_seed = np.array(roi_feature_ids_all[self.params.ref_ds]) == node_id ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed if __debug__: msg = 'ROI (%i/%i), %i features' % (i + 1, len(block), ds_temp[self.params.ref_ds].nfeatures) debug('SLC', bar(float(i + 1) / len(block), msg), cr=True) hmappers = featselhyper(ds_temp) assert(len(hmappers) == len(datasets)) roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds] for isub, roi_feature_ids in enumerate(roi_feature_ids_all): if not self.params.combine_neighbormappers: I = roi_feature_ids #J = [roi_feature_ids[node_id]] * len(roi_feature_ids) J = [node_id] * len(roi_feature_ids) V = hmappers[isub].tolist() if np.isscalar(V): V = [V] else: I, J, V = [], [], [] for f2, roi_feature_id_ref_ds in enumerate(roi_feature_ids_ref_ds): I += roi_feature_ids J += [roi_feature_id_ref_ds] * len(roi_feature_ids) V += hmappers[isub][:, f2].tolist() proj = coo_matrix( (V, (I, J)), shape=(max(self.nfeatures, max(I) + 1), max(self.nfeatures, max(J) + 1)), dtype=self.params.dtype) proj = proj.tocsc() # Cleaning up the current subject's projections to free up memory hmappers[isub] = [[] for _ in hmappers] projections[isub] = projections[isub] + proj if self.params.results_backend == 'native': return projections elif self.params.results_backend == 'hdf5': # store results in a temporary file and return a filename results_file = mktemp(prefix=self.params.tmp_prefix, suffix='-%s.hdf5' % iblock) if __debug__: debug('SLC', "Storing results into %s" % results_file) h5save(results_file, projections) if __debug__: debug('SLC_', "Results stored") return results_file else: raise RuntimeError("Must not reach this point") def __handle_results(self, results): if self.params.results_backend == 'hdf5': # 'results' must be just a filename assert(isinstance(results, str)) if __debug__: debug('SLC', "Loading results from %s" % results) results_data = h5load(results) os.unlink(results) if __debug__: debug('SLC_', "Loaded results of len=%d from" % len(results_data)) for isub, res in enumerate(results_data): self.projections[isub] = self.projections[isub] + res if __debug__: debug('SLC_', "Finished adding results") return def __handle_all_results(self, results): """Helper generator to decorate passing the results out to results_fx """ for r in results: yield self.__handle_results(r) @due.dcite( Doi('10.1093/cercor/bhw068'), description="Full cortex hyperalignment of data to a common space", tags=["implementation"]) def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) selected = [_ for _ in range(ndatasets) if _ not in params.exclude_from_model] ref_ds_train = selected.index(params.ref_ds) params.hyperalignment.params.ref_ds = ref_ds_train warning('Using %dth dataset as the reference dataset (%dth after ' 'excluding datasets)' % (params.ref_ds, ref_ds_train)) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning('%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max(np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( ref_ds=params.ref_ds, featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines( datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)] if isinstance(qe, SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError("Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True.") self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets)] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug('SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug('Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections] return self.projections def _get_verified_ids(self, queryengines): """Helper to return ids of queryengines, verifying that they are the same""" qe0 = queryengines[0] roi_ids = qe0.ids for qe in queryengines: if qe is not qe0: # if a different query engine (so wasn't just replicated) if np.any(qe.ids != qe0.ids): raise RuntimeError( "Query engine %s provided different ids than %s. Not supported" % (qe0, qe)) return roi_ids def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds): """Helper to return trained query engine(s), either list of one or one per each dataset if queryengine is None then IndexQueryEngine based on radius is created """ ndatasets = len(datasets) if queryengine: if isinstance(queryengine, (list, tuple)): queryengines = queryengine if len(queryengines) != ndatasets: raise ValueError( "%d query engines were specified although %d datasets " "provided" % (len(queryengines), ndatasets)) _shpaldebug("Training provided query engines") for qe, ds in zip(queryengines, datasets): qe.train(ds) else: queryengine.train(datasets[ref_ds]) queryengines = [queryengine] else: _shpaldebug('No custom query engines were provided. Setting up the ' 'volumetric query engine on voxel_indices.') queryengine = IndexQueryEngine(voxel_indices=Sphere(radius)) queryengine.train(datasets[ref_ds]) queryengines = [queryengine] return queryengines
# A little debug helper to avoid constant if __debug__ conditioning, # but it also means that debugging could not be activated at run time # after the import of this module def _shpaldebug(*args): pass if __debug__: from mvpa2.base import debug if 'SHPAL' in debug.active: def _shpaldebug(msg): debug('SHPAL', "%s" % msg) @due.dcite( Doi('10.1016/j.neuron.2011.08.026'), description="Per-feature measure of maximal correlation to features in other datasets", tags=["implementation"]) def compute_feature_scores(datasets, exclude_from_model=None): """ Takes a list of datasets and computes a magical feature score for each feature in each dataset :ref:`Haxby et al., Neuron (2011) <HGC+11>` Parameters ---------- datasets : list or tuple of datasets exclude_from_model: list of dataset indices that won't participate in voxel selection of others