def test_group_sparse_covariance(): # run in debug mode. Should not fail # without debug mode: cost must decrease. signals, _, _ = generate_group_sparse_gaussian_graphs( density=0.1, n_subjects=5, n_features=10, min_n_samples=100, max_n_samples=151, random_state=np.random.RandomState(0)) alpha = 0.1 # These executions must hit the tolerance limit emp_covs, omega = group_sparse_covariance(signals, alpha, max_iter=20, tol=1e-2, debug=True, verbose=0) emp_covs, omega2 = group_sparse_covariance(signals, alpha, max_iter=20, tol=1e-2, debug=True, verbose=0) np.testing.assert_almost_equal(omega, omega2, decimal=4) class Probe(object): def __init__(self): self.objective = [] def __call__(self, emp_covs, n_samples, alpha, max_iter, tol, n, omega, omega_diff): if n >= 0: _, objective = group_sparse_scores(omega, n_samples, emp_covs, alpha) self.objective.append(objective) # Use a probe to test for number of iterations and decreasing objective. probe = Probe() emp_covs, omega = group_sparse_covariance( signals, alpha, max_iter=4, tol=None, verbose=0, probe_function=probe) objective = probe.objective # check number of iterations assert_equal(len(objective), 4) # np.testing.assert_array_less is a strict comparison. # Zeros can occur in np.diff(objective). assert_true(np.all(np.diff(objective) <= 0)) assert_equal(omega.shape, (10, 10, 5)) # Test input argument checking assert_raises(ValueError, group_sparse_covariance, signals, "") assert_raises(ValueError, group_sparse_covariance, 1, alpha) assert_raises(ValueError, group_sparse_covariance, [np.ones((2, 2)), np.ones((2, 3))], alpha) # Check consistency between classes gsc1 = GroupSparseCovarianceCV(alphas=4, tol=1e-1, max_iter=20, verbose=0, early_stopping=True) gsc1.fit(signals) gsc2 = GroupSparseCovariance(alpha=gsc1.alpha_, tol=1e-1, max_iter=20, verbose=0) gsc2.fit(signals) np.testing.assert_almost_equal(gsc1.precisions_, gsc2.precisions_, decimal=4)
def create_signals(parameters, output_dir="tmp_signals"): """Simple cache system. parameters: dict keys: n_var, n_tasks, density, (mandatory) min_samples, max_samples (optional) normalize (optional, default True) """ cache_dir = get_cache_dir(parameters, output_dir) next_num = 0 if cache_dir is not None: if not os.path.isdir(cache_dir): os.makedirs(cache_dir) else: filenames = glob.glob(os.path.join(cache_dir, "precisions_*.pickle")) numbers = [int(os.path.basename(fname) .rsplit(".")[0] .split("_")[1]) for fname in filenames] if len(numbers) > 0: next_num = max(numbers) + 1 # Look for/create true precisions, topology and signals ground_truth_fname = os.path.join(cache_dir, "ground_truth.pickle") if cache_dir is None or not os.path.isfile(ground_truth_fname): rand_gen = np.random.RandomState(0) min_samples = parameters.get("min_samples", 100) max_samples = parameters.get("max_samples", 150) # Generate signals signals, precisions, topology = \ testing.generate_group_sparse_gaussian_graphs( n_subjects=parameters["n_tasks"], n_features=parameters["n_var"], density=parameters["density"], random_state=rand_gen, min_n_samples=min_samples, max_n_samples=max_samples) if parameters.get("normalize", True): for signal in signals: signal /= signal.std(axis=0) gt = {"precisions": precisions, "topology": topology, "signals": signals} if cache_dir is not None: pickle.dump(gt, open(ground_truth_fname, "wb")) if cache_dir is not None: gt = pickle.load(open(ground_truth_fname, "rb")) return next_num, cache_dir, gt
def cv_object_study(early_stopping=True, output_dir="_early_stopping"): """Convenience function for running GroupSparseCovarianceCV. """ parameters = {'n_tasks': 10, 'tol': 1e-3, 'max_iter': 50, "n_jobs": 7, "cv": 4} parameters["tol_cv"] = parameters["tol"] parameters["max_iter_cv"] = parameters["max_iter"] synthetic = False print("-- Getting signals") if synthetic: parameters["n_features"] = 50 parameters["density"] = 0.2 signals, _, _ = testing.generate_group_sparse_gaussian_graphs( n_subjects=parameters["n_tasks"], n_features=parameters["n_features"], min_n_samples=100, max_n_samples=150, density=parameters["density"]) else: mem = joblib.Memory(".") signals = [] for n in range(parameters["n_tasks"]): signals.append(mem.cache(region_signals)(n)) print("-- Optimizing") gsc = GroupSparseCovarianceCV(early_stopping=early_stopping, cv=parameters["cv"], n_jobs=parameters["n_jobs"], tol=parameters["tol"], tol_cv=parameters["tol_cv"], max_iter=parameters["max_iter"], max_iter_cv=parameters["max_iter_cv"], verbose=1) t0 = time.time() gsc.fit(signals) t1 = time.time() print("\nTime spent in fit(): %.1f s" % (t1 - t0)) print("\n-- selected alpha: %.3e" % gsc.alpha_) print("-- cv_alphas_:") print(repr(np.asarray(gsc.cv_alphas_))) print("-- cv_scores_:") print(repr(np.asarray(gsc.cv_scores_))) out_filename = os.path.join(output_dir, "cv_object_study.pickle") pickle.dump([gsc.alpha_, gsc.cv_alphas_, gsc.cv_scores_, gsc.covariances_, gsc.precisions_], open(out_filename, "wb"))
import matplotlib.pyplot as plt def plot_matrix(m, ylabel=""): abs_max = abs(m).max() plt.imshow(m, cmap=plt.cm.RdBu_r, interpolation="nearest", vmin=-abs_max, vmax=abs_max) # Generate synthetic data from nilearn._utils.testing import generate_group_sparse_gaussian_graphs n_subjects = 20 # number of subjects n_displayed = 3 # number of subjects displayed subjects, precisions, topology = generate_group_sparse_gaussian_graphs( n_subjects=n_subjects, n_features=10, min_n_samples=30, max_n_samples=50, density=0.1) fig = plt.figure(figsize=(10, 7)) plt.subplots_adjust(hspace=0.4) for n in range(n_displayed): plt.subplot(n_displayed, 4, 4 * n + 1) plot_matrix(precisions[n]) if n == 0: plt.title("ground truth") plt.ylabel("subject %d" % n) # Run group-sparse covariance on all subjects from nilearn.group_sparse_covariance import GroupSparseCovarianceCV gsc = GroupSparseCovarianceCV(max_iter=50, verbose=1)
def benchmark1(): """Plot different quantities for varying alpha.""" # Signals min_samples, max_samples = 100, 150 # train signals length n_var = 50 n_tasks = 40 density = 0.1 random_state = np.random.RandomState(0) test_samples = 4000 # number of samples for test signals # Estimation n_alphas = 10 max_iter = 200 tol = 1e-3 # Generate signals signals, precisions, topology = \ testing.generate_group_sparse_gaussian_graphs( n_subjects=n_tasks, n_features=n_var, density=density, random_state=random_state, min_n_samples=min_samples, max_n_samples=max_samples) emp_covs, n_samples = empirical_covariances(signals) # Estimate precision matrices alpha_1, _ = compute_alpha_max(emp_covs, n_samples) alpha_0 = 1e-2 * alpha_1 ## alpha_1 = 0.067 ## alpha_0 = 0.044 alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), n_alphas)[::-1] parameters = joblib.Parallel(n_jobs=7, verbose=1)( joblib.delayed(group_sparse_covariance)(emp_covs, n_samples, alpha, max_iter=max_iter, tol=tol) for alpha in alphas) # Compute scores test_signals = testing.generate_signals_from_precisions( precisions, min_n_samples=test_samples, max_n_samples=test_samples + 1, random_state=random_state) test_emp_covs, _ = empirical_covariances(test_signals) del test_signals for params in parameters: params["ll_score"], params["pen_score"] = group_sparse_scores( params["precisions"], n_samples, test_emp_covs, params["alpha"]) # Plot graphs alpha, ll_score, pen_score = get_series( parameters, ("alpha", "ll_score", "pen_score")) non_zero = [(p["precisions"][..., 0] != 0).sum() for p in parameters] pl.figure() pl.semilogx(alpha, ll_score, "-+", label="log-likelihood") pl.semilogx(alpha, pen_score, "-+", label="penalized LL") pl.xlabel("alpha") pl.ylabel("score") pl.grid() pl.figure() pl.semilogx(alpha, non_zero, "-+") pl.xlabel("alpha") pl.ylabel("non_zero") pl.grid() pl.figure() pl.loglog(alpha, non_zero, "-+") pl.xlabel("alpha") pl.ylabel("non_zero") pl.grid() pl.figure() pl.imshow(topology, interpolation="nearest") pl.title("true topology") ## precisions = get_series(parameters, ("precisions", )) ## for prec, alpha in zip(precisions, alpha): ## pl.figure() ## pl.imshow(prec[..., 0] != 0, interpolation="nearest") ## pl.title(alpha) pl.show()
abs_max = abs(m).max() plt.imshow(m, cmap=plt.cm.RdBu_r, interpolation="nearest", vmin=-abs_max, vmax=abs_max) # Generate synthetic data from nilearn._utils.testing import generate_group_sparse_gaussian_graphs n_subjects = 20 # number of subjects n_displayed = 3 # number of subjects displayed subjects, precisions, topology = generate_group_sparse_gaussian_graphs( n_subjects=n_subjects, n_features=10, min_n_samples=30, max_n_samples=50, density=0.1) fig = plt.figure(figsize=(10, 7)) plt.subplots_adjust(hspace=0.4) for n in range(n_displayed): plt.subplot(n_displayed, 4, 4 * n + 1) plot_matrix(precisions[n]) if n == 0: plt.title("ground truth") plt.ylabel("subject %d" % n) # Run group-sparse covariance on all subjects from nilearn.group_sparse_covariance import GroupSparseCovarianceCV gsc = GroupSparseCovarianceCV(max_iter=50, verbose=1)