def ridge(stim, resp, alpha, singcutoff=1e-10, normalpha=False, logger=ridge_logger): """Uses ridge regression to find a linear transformation of [stim] that approximates [resp]. The regularization parameter is [alpha]. Parameters ---------- stim : array_like, shape (T, N) Stimuli with T time points and N features. resp : array_like, shape (T, M) Responses with T time points and M separate responses. alpha : float or array_like, shape (M,) Regularization parameter. Can be given as a single value (which is applied to all M responses) or separate values for each response. normalpha : boolean Whether ridge parameters should be normalized by the largest singular value of stim. Good for comparing models with different numbers of parameters. Returns ------- wt : array_like, shape (N, M) Linear regression weights. """ try: U, S, Vh = np.linalg.svd(stim, full_matrices=False) except np.linalg.LinAlgError: logger.info("NORMAL SVD FAILED, trying more robust dgesvd..") U, S, Vh = dgesvd(stim, full_matrices=False, algo='svd') UR = np.dot(U.T, np.nan_to_num(resp)) # Expand alpha to a collection if it's just a single value if isinstance(alpha, float): alpha = np.ones(resp.shape[1]) * alpha # Normalize alpha by the LSV norm norm = S[0] if normalpha: nalphas = alpha * norm else: nalphas = alpha # Compute weights for each alpha ualphas = np.unique(nalphas) wt = np.zeros((stim.shape[1], resp.shape[1])) for ua in ualphas: selvox = np.nonzero(nalphas == ua)[0] awt = reduce(np.dot, [Vh.T, np.diag(S / (S**2 + ua**2)), UR[:, selvox]]) wt[:, selvox] = awt return wt
def _train(self, source): params = self.params # Since it is unsupervised, we don't care about labels datas = () odatas = () means = () shapes = () assess_residuals = __debug__ and 'MAP_' in debug.active target = source.sa[self.get_space()].value for i, ds in enumerate((source, target)): if is_datasetlike(ds): data = np.asarray(ds.samples) else: data = ds if assess_residuals: odatas += (data,) if self._demean: if i == 0: mean = self._offset_in else: mean = data.mean(axis=0) data = data - mean else: # no demeaning === zero means mean = np.zeros(shape=data.shape[1:]) means += (mean,) datas += (data,) shapes += (data.shape,) # shortcuts for sizes sn, sm = shapes[0] tn, tm = shapes[1] # Check the sizes if sn != tn: raise ValueError, "Data for both spaces should have the same " \ "number of samples. Got %d in source and %d in target space" \ % (sn, tn) # Sums of squares ssqs = [np.sum(d**2, axis=0) for d in datas] # XXX check for being invariant? # needs to be tuned up properly and not raise but handle for i in xrange(2): if np.all(ssqs[i] <= np.abs((np.finfo(datas[i].dtype).eps * sn * means[i] )**2)): raise ValueError, "For now do not handle invariant in time datasets" norms = [ np.sqrt(np.sum(ssq)) for ssq in ssqs ] normed = [ data/norm for (data, norm) in zip(datas, norms) ] # add new blank dimensions to source space if needed if sm < tm: normed[0] = np.hstack( (normed[0], np.zeros((sn, tm-sm))) ) if sm > tm: if params.reduction: normed[1] = np.hstack( (normed[1], np.zeros((sn, sm-tm))) ) else: raise ValueError, "reduction=False, so mapping from " \ "higher dimensionality " \ "source space is not supported. Source space had %d " \ "while target %d dimensions (features)" % (sm, tm) source, target = normed if params.oblique: # Just do silly linear system of equations ;) or naive # inverse problem if sn == sm and tm == 1: T = np.linalg.solve(source, target) else: T = np.linalg.lstsq(source, target, rcond=params.oblique_rcond)[0] ss = 1.0 else: # Orthogonal transformation # figure out optimal rotation if params.svd == 'numpy': U, s, Vh = np.linalg.svd(np.dot(target.T, source), full_matrices=False) elif params.svd == 'scipy': # would raise exception if not present externals.exists('scipy', raise_=True) import scipy U, s, Vh = scipy.linalg.svd(np.dot(target.T, source), full_matrices=False) elif params.svd == 'dgesvd': from mvpa2.support.lapack_svd import svd as dgesvd U, s, Vh = dgesvd(np.dot(target.T, source), full_matrices=True, algo='svd') else: raise ValueError('Unknown type of svd %r'%(params.svd)) T = np.dot(Vh.T, U.T) if not params.reflection: # then we need to assure that it is only rotation # "recipe" from # http://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem # for more and info and original references, see # http://dx.doi.org/10.1007%2FBF02289451 nsv = len(s) s[:-1] = 1 s[-1] = np.linalg.det(T) T = np.dot(U[:, :nsv] * s, Vh) # figure out scale and final translation # XXX with reflection False -- not sure if here or there or anywhere... ss = sum(s) # if we were to collect standardized distance # std_d = 1 - sD**2 # select out only relevant dimensions if sm != tm: T = T[:sm, :tm] self._scale = scale = ss * norms[1] / norms[0] # Assign projection if self.params.scaling: proj = scale * T else: proj = T self._proj = proj if self._demean: self._offset_out = means[1] if __debug__ and 'MAP_' in debug.active: # compute the residuals res_f = self.forward(odatas[0]) d_f = np.linalg.norm(odatas[1] - res_f)/np.linalg.norm(odatas[1]) res_r = self.reverse(odatas[1]) d_r = np.linalg.norm(odatas[0] - res_r)/np.linalg.norm(odatas[0]) debug('MAP_', "%s, residuals are forward: %g," " reverse: %g" % (repr(self), d_f, d_r))
def _train(self, source): params = self.params # Since it is unsupervised, we don't care about labels datas = () odatas = () means = () shapes = () assess_residuals = __debug__ and 'MAP_' in debug.active target = source.sa[self.get_space()].value for i, ds in enumerate((source, target)): if is_datasetlike(ds): data = np.asarray(ds.samples) else: data = ds if assess_residuals: odatas += (data, ) if self._demean: if i == 0: mean = self._offset_in else: mean = data.mean(axis=0) data = data - mean else: # no demeaning === zero means mean = np.zeros(shape=data.shape[1:]) means += (mean, ) datas += (data, ) shapes += (data.shape, ) # shortcuts for sizes sn, sm = shapes[0] tn, tm = shapes[1] # Check the sizes if sn != tn: raise ValueError, "Data for both spaces should have the same " \ "number of samples. Got %d in source and %d in target space" \ % (sn, tn) # Sums of squares ssqs = [np.sum(d**2, axis=0) for d in datas] # XXX check for being invariant? # needs to be tuned up properly and not raise but handle for i in xrange(2): if np.all(ssqs[i] <= np.abs((np.finfo(datas[i].dtype).eps * sn * means[i])**2)): raise ValueError, "For now do not handle invariant in time datasets" norms = [np.sqrt(np.sum(ssq)) for ssq in ssqs] normed = [data / norm for (data, norm) in zip(datas, norms)] # add new blank dimensions to source space if needed if sm < tm: normed[0] = np.hstack((normed[0], np.zeros((sn, tm - sm)))) if sm > tm: if params.reduction: normed[1] = np.hstack((normed[1], np.zeros((sn, sm - tm)))) else: raise ValueError, "reduction=False, so mapping from " \ "higher dimensionality " \ "source space is not supported. Source space had %d " \ "while target %d dimensions (features)" % (sm, tm) source, target = normed if params.oblique: # Just do silly linear system of equations ;) or naive # inverse problem if sn == sm and tm == 1: T = np.linalg.solve(source, target) else: T = np.linalg.lstsq(source, target, rcond=params.oblique_rcond)[0] ss = 1.0 else: # Orthogonal transformation # figure out optimal rotation if params.svd == 'numpy': U, s, Vh = np.linalg.svd(np.dot(target.T, source), full_matrices=False) elif params.svd == 'scipy': # would raise exception if not present externals.exists('scipy', raise_=True) import scipy U, s, Vh = scipy.linalg.svd(np.dot(target.T, source), full_matrices=False) elif params.svd == 'dgesvd': from mvpa2.support.lapack_svd import svd as dgesvd U, s, Vh = dgesvd(np.dot(target.T, source), full_matrices=True, algo='svd') else: raise ValueError('Unknown type of svd %r' % (params.svd)) T = np.dot(Vh.T, U.T) if not params.reflection: # then we need to assure that it is only rotation # "recipe" from # http://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem # for more and info and original references, see # http://dx.doi.org/10.1007%2FBF02289451 s_new = np.ones_like(s) s_new[-1] = np.linalg.det(T) T = np.dot(Vh.T * s_new, U.T) # figure out scale and final translation if not params.reflection: ss = np.sum(s_new * s) else: ss = np.sum(s) # if we were to collect standardized distance # std_d = 1 - sD**2 # select out only relevant dimensions if sm != tm: T = T[:sm, :tm] self._scale = scale = ss * norms[1] / norms[0] # Assign projection if self.params.scaling: proj = scale * T else: proj = T self._proj = proj if self._demean: self._offset_out = means[1] if __debug__ and 'MAP_' in debug.active: # compute the residuals res_f = self.forward(odatas[0]) d_f = np.linalg.norm(odatas[1] - res_f) / np.linalg.norm(odatas[1]) res_r = self.reverse(odatas[1]) d_r = np.linalg.norm(odatas[0] - res_r) / np.linalg.norm(odatas[0]) debug( 'MAP_', "%s, residuals are forward: %g," " reverse: %g" % (repr(self), d_f, d_r))
def ridge_corr(Rstim, Pstim, Rresp, Presp, alphas, normalpha=False, corrmin=0.2, singcutoff=1e-10, use_corr=True, logger=ridge_logger): """Uses ridge regression to find a linear transformation of [Rstim] that approximates [Rresp], then tests by comparing the transformation of [Pstim] to [Presp]. This procedure is repeated for each regularization parameter alpha in [alphas]. The correlation between each prediction and each response for each alpha is returned. The regression weights are NOT returned, because computing the correlations without computing regression weights is much, MUCH faster. Parameters ---------- Rstim : array_like, shape (TR, N) Training stimuli with TR time points and N features. Each feature should be Z-scored across time. Pstim : array_like, shape (TP, N) Test stimuli with TP time points and N features. Each feature should be Z-scored across time. Rresp : array_like, shape (TR, M) Training responses with TR time points and M responses (voxels, neurons, what-have-you). Each response should be Z-scored across time. Presp : array_like, shape (TP, M) Test responses with TP time points and M responses. alphas : list or array_like, shape (A,) Ridge parameters to be tested. Should probably be log-spaced. np.logspace(0, 3, 20) works well. normalpha : boolean Whether ridge parameters should be normalized by the largest singular value (LSV) norm of Rstim. Good for comparing models with different numbers of parameters. corrmin : float in [0..1] Purely for display purposes. After each alpha is tested, the number of responses with correlation greater than corrmin minus the number of responses with correlation less than negative corrmin will be printed. For long-running regressions this vague metric of non-centered skewness can give you a rough sense of how well the model is working before it's done. singcutoff : float The first step in ridge regression is computing the singular value decomposition (SVD) of the stimulus Rstim. If Rstim is not full rank, some singular values will be approximately equal to zero and the corresponding singular vectors will be noise. These singular values/vectors should be removed both for speed (the fewer multiplications the better!) and accuracy. Any singular values less than singcutoff will be removed. use_corr : boolean If True, this function will use correlation as its metric of model fit. If False, this function will instead use variance explained (R-squared) as its metric of model fit. For ridge regression this can make a big difference -- highly regularized solutions will have very small norms and will thus explain very little variance while still leading to high correlations, as correlation is scale-free while R**2 is not. Returns ------- Rcorrs : array_like, shape (A, M) The correlation between each predicted response and each column of Presp for each alpha. """ ## Calculate SVD of stimulus matrix logger.info("Doing SVD...") try: U, S, Vh = np.linalg.svd(Rstim, full_matrices=False) except np.linalg.LinAlgError: logger.info("NORMAL SVD FAILED, trying more robust dgesvd..") print(stim.shape) U, S, Vh = dgesvd(stim, full_matrices=False, algo='svd') ## Truncate tiny singular values for speed origsize = S.shape[0] ngoodS = np.sum(S > singcutoff) nbad = origsize - ngoodS U = U[:, :ngoodS] S = S[:ngoodS] Vh = Vh[:ngoodS] logger.info("Dropped %d tiny singular values.. (U is now %s)" % (nbad, str(U.shape))) ## Normalize alpha by the LSV norm norm = S[0] logger.info("Training stimulus has LSV norm: %0.03f" % norm) if normalpha: nalphas = alphas * norm else: nalphas = alphas ## Precompute some products for speed UR = np.dot(U.T, Rresp) ## Precompute this matrix product for speed PVh = np.dot(Pstim, Vh.T) ## Precompute this matrix product for speed #Prespnorms = np.apply_along_axis(np.linalg.norm, 0, Presp) ## Precompute test response norms zPresp = zs(Presp) #Prespvar = Presp.var(0) Prespvar_actual = Presp.var(0) Prespvar = (np.ones_like(Prespvar_actual) + Prespvar_actual) / 2.0 logger.info("Average difference between actual & assumed Prespvar: %0.3f" % (Prespvar_actual - Prespvar).mean()) Rcorrs = [] ## Holds training correlations for each alpha for na, a in zip(nalphas, alphas): #D = np.diag(S/(S**2+a**2)) ## Reweight singular vectors by the ridge parameter D = S / ( S**2 + na**2 ) ## Reweight singular vectors by the (normalized?) ridge parameter pred = np.dot(mult_diag(D, PVh, left=False), UR) ## Best (1.75 seconds to prediction in test) # pred = np.dot(mult_diag(D, np.dot(Pstim, Vh.T), left=False), UR) ## Better (2.0 seconds to prediction in test) # pvhd = reduce(np.dot, [Pstim, Vh.T, D]) ## Pretty good (2.4 seconds to prediction in test) # pred = np.dot(pvhd, UR) # wt = reduce(np.dot, [Vh.T, D, UR]).astype(dtype) ## Bad (14.2 seconds to prediction in test) # wt = reduce(np.dot, [Vh.T, D, U.T, Rresp]).astype(dtype) ## Worst # pred = np.dot(Pstim, wt) ## Predict test responses if use_corr: #prednorms = np.apply_along_axis(np.linalg.norm, 0, pred) ## Compute predicted test response norms #Rcorr = np.array([np.corrcoef(Presp[:,ii], pred[:,ii].ravel())[0,1] for ii in range(Presp.shape[1])]) ## Slowly compute correlations #Rcorr = np.array(np.sum(np.multiply(Presp, pred), 0)).squeeze()/(prednorms*Prespnorms) ## Efficiently compute correlations Rcorr = (zPresp * zs(pred)).mean(0) else: ## Compute variance explained resvar = (Presp - pred).var(0) Rsq = 1 - (resvar / Prespvar) Rcorr = np.sqrt(np.abs(Rsq)) * np.sign(Rsq) Rcorr[np.isnan(Rcorr)] = 0 Rcorrs.append(Rcorr) log_template = "Training: alpha=%0.3f, mean corr=%0.5f, max corr=%0.5f, over-under(%0.2f)=%d" log_msg = log_template % (a, np.mean(Rcorr), np.max(Rcorr), corrmin, (Rcorr > corrmin).sum() - (-Rcorr > corrmin).sum()) logger.info(log_msg) return Rcorrs