def fit(self, X, y=None): """Fit PoSCE to the given time series for each subject Parameters ---------- X : list of n_subjects numpy.ndarray, shapes (n_samples, n_features) The input subjects time series. The number of samples may differ from one subject to another Returns ------- self : PopulationShrunkCovariance instance The object itself. Useful for chaining operations. """ # compute covariances from timeseries self.cov_estimator_ = clone(self.cov_estimator) covariances = [self.cov_estimator_.fit(x).covariance_ for x in X] # compute prior mean if self.prior_mean_type == "geometric": self.prior_mean_ = _geometric_mean(covariances, max_iter=30, tol=1e-7) elif self.prior_mean_type == "empirical": self.prior_mean_ = np.mean(covariances, axis=0) else: raise ValueError("Allowed mean types are" '"geometric", "euclidean"' ', got type "{}"'.format(self.prior_mean_type)) self.prior_whitening_ = _map_eigenvalues(lambda x: 1.0 / np.sqrt(x), self.prior_mean_) self.prior_whitening_inv_ = _map_eigenvalues(lambda x: np.sqrt(x), self.prior_mean_) # compute the population prior dispersion connectivities = [ _map_eigenvalues( np.log, self.prior_whitening_.dot(cov).dot(self.prior_whitening_)) for cov in covariances ] connectivities = np.array(connectivities) connectivities = sym_matrix_to_vec(connectivities) self.prior_cov_ = np.mean( [ np.expand_dims(c, 1).dot(np.expand_dims(c, 0)) for c in connectivities ], axis=0, ) # approximate the population prior dispersion self.prior_cov_approx_ = regularized_eigenvalue_decomposition( self.prior_cov_, explained_variance_threshold=0.7) return self
def test_geometric_mean_diagonal(): n_matrices = 20 n_features = 5 diags = [] for k in range(n_matrices): diag = np.eye(n_features) diag[k % n_features, k % n_features] = 1e4 + k diag[(n_features - 1) // (k + 1), (n_features - 1) // (k + 1)] = \ (k + 1) * 1e-4 diags.append(diag) geo = np.prod(np.array(diags), axis=0) ** (1 / float(len(diags))) assert_array_almost_equal(_geometric_mean(diags), geo)
def test_geometric_mean_couple(): n_features = 7 spd1 = np.ones((n_features, n_features)) spd1 = spd1.dot(spd1) + n_features * np.eye(n_features) spd2 = np.tril(np.ones((n_features, n_features))) spd2 = spd2.dot(spd2.T) vals_spd2, vecs_spd2 = np.linalg.eigh(spd2) spd2_sqrt = _form_symmetric(np.sqrt, vals_spd2, vecs_spd2) spd2_inv_sqrt = _form_symmetric(np.sqrt, 1. / vals_spd2, vecs_spd2) geo = spd2_sqrt.dot(_map_eigenvalues(np.sqrt, spd2_inv_sqrt.dot(spd1).dot( spd2_inv_sqrt))).dot(spd2_sqrt) assert_array_almost_equal(_geometric_mean([spd1, spd2]), geo)
def test_geometric_mean_geodesic(): n_matrices = 10 n_features = 6 sym = np.arange(n_features) / np.linalg.norm(np.arange(n_features)) sym = sym * sym[:, np.newaxis] times = np.arange(n_matrices) non_singular = np.eye(n_features) non_singular[1:3, 1:3] = np.array([[-1, -.5], [-.5, -1]]) spds = [] for time in times: spds.append(non_singular.dot(_map_eigenvalues(np.exp, time * sym)).dot( non_singular.T)) gmean = non_singular.dot(_map_eigenvalues(np.exp, times.mean() * sym)).dot( non_singular.T) assert_array_almost_equal(_geometric_mean(spds), gmean)
def map_tangent(data, diag=False): """Transform to tangent space. Parameters ---------- data: list of numpy.ndarray of shape(n_features, n_features) List of semi-positive definite matrices. diag: bool Whether to discard the diagonal elements before vectorizing. Default is False. Returns ------- tangent: numpy.ndarray, shape(n_features * (n_features - 1) / 2) """ mean_ = _geometric_mean(data, max_iter=30, tol=1e-7) whitening_ = _map_eigenvalues(lambda x: 1. / np.sqrt(x), mean_) tangent = [_map_eigenvalues(np.log, whitening_.dot(c).dot(whitening_)) for c in data] tangent = np.array(tangent) return sym_matrix_to_vec(tangent, discard_diagonal=diag)
def test_geometric_mean_properties(): n_matrices = 40 n_features = 15 spds = [] for k in range(n_matrices): spds.append(random_spd(n_features, eig_min=1., cond=10., random_state=0)) input_spds = copy.copy(spds) gmean = _geometric_mean(spds) # Generic assert_true(isinstance(spds, list)) for spd, input_spd in zip(spds, input_spds): assert_array_equal(spd, input_spd) assert(is_spd(gmean, decimal=7)) # Invariance under reordering spds.reverse() spds.insert(0, spds[1]) spds.pop(2) assert_array_almost_equal(_geometric_mean(spds), gmean) # Invariance under congruent transformation non_singular = random_non_singular(n_features, random_state=0) spds_cong = [non_singular.dot(spd).dot(non_singular.T) for spd in spds] assert_array_almost_equal(_geometric_mean(spds_cong), non_singular.dot(gmean).dot(non_singular.T)) # Invariance under inversion spds_inv = [linalg.inv(spd) for spd in spds] init = linalg.inv(np.mean(spds, axis=0)) assert_array_almost_equal(_geometric_mean(spds_inv, init=init), linalg.inv(gmean)) # Gradient norm is decreasing grad_norm = grad_geometric_mean(spds, tol=1e-20) difference = np.diff(grad_norm) assert_true(np.amax(difference) <= 0.) # Check warning if gradient norm in the last step is less than # tolerance max_iter = 1 tol = 1e-20 with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") gmean = _geometric_mean(spds, max_iter=max_iter, tol=tol) assert_equal(len(w), 1) grad_norm = grad_geometric_mean(spds, max_iter=max_iter, tol=tol) assert_equal(len(grad_norm), max_iter) assert_true(grad_norm[-1] > tol) # Evaluate convergence. A warning is printed if tolerance is not reached for p in [.5, 1.]: # proportion of badly conditionned matrices spds = [] for k in range(int(p * n_matrices)): spds.append(random_spd(n_features, eig_min=1e-2, cond=1e6, random_state=0)) for k in range(int(p * n_matrices), n_matrices): spds.append(random_spd(n_features, eig_min=1., cond=10., random_state=0)) if p < 1: max_iter = 30 else: max_iter = 60 gmean = _geometric_mean(spds, max_iter=max_iter, tol=1e-5)
def test_geometric_mean_properties(): n_matrices = 40 n_features = 15 spds = [] for k in range(n_matrices): spds.append(random_spd(n_features, eig_min=1., cond=10., random_state=0)) input_spds = copy.copy(spds) gmean = _geometric_mean(spds) # Generic assert isinstance(spds, list) for spd, input_spd in zip(spds, input_spds): assert_array_equal(spd, input_spd) assert(is_spd(gmean, decimal=7)) # Invariance under reordering spds.reverse() spds.insert(0, spds[1]) spds.pop(2) assert_array_almost_equal(_geometric_mean(spds), gmean) # Invariance under congruent transformation non_singular = random_non_singular(n_features, random_state=0) spds_cong = [non_singular.dot(spd).dot(non_singular.T) for spd in spds] assert_array_almost_equal(_geometric_mean(spds_cong), non_singular.dot(gmean).dot(non_singular.T)) # Invariance under inversion spds_inv = [linalg.inv(spd) for spd in spds] init = linalg.inv(np.mean(spds, axis=0)) assert_array_almost_equal(_geometric_mean(spds_inv, init=init), linalg.inv(gmean)) # Gradient norm is decreasing grad_norm = grad_geometric_mean(spds, tol=1e-20) difference = np.diff(grad_norm) assert np.amax(difference) <= 0. # Check warning if gradient norm in the last step is less than # tolerance max_iter = 1 tol = 1e-20 with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") gmean = _geometric_mean(spds, max_iter=max_iter, tol=tol) assert len(w) == 1 grad_norm = grad_geometric_mean(spds, max_iter=max_iter, tol=tol) assert len(grad_norm) == max_iter assert grad_norm[-1] > tol # Evaluate convergence. A warning is printed if tolerance is not reached for p in [.5, 1.]: # proportion of badly conditionned matrices spds = [] for k in range(int(p * n_matrices)): spds.append(random_spd(n_features, eig_min=1e-2, cond=1e6, random_state=0)) for k in range(int(p * n_matrices), n_matrices): spds.append(random_spd(n_features, eig_min=1., cond=10., random_state=0)) if p < 1: max_iter = 30 else: max_iter = 60 gmean = _geometric_mean(spds, max_iter=max_iter, tol=1e-5)