def benchmark1():
    parameters = dict(n_var=200,
                      n_tasks=5,
                      density=0.15,

                      tol=1e-2,
                      n_alphas=5,
                      max_iter=50,
                      min_samples=100,
                      max_samples=150)

    next_num, cache_dir, gt = create_signals(parameters, output_dir=output_dir)

    emp_covs, n_samples = empirical_covariances(gt['signals'])
    max_alpha, _ = compute_alpha_max(emp_covs, n_samples)

    min_alpha = max_alpha / 100.
    print(min_alpha, max_alpha)
    alphas = np.logspace(np.log10(min_alpha), np.log10(max_alpha),
                       parameters['n_alphas'])[::-1]

    joblib.Parallel(n_jobs=1, verbose=1)(
        joblib.delayed(save_group_sparse_covariance)(
            emp_covs, n_samples, alpha, max_iter=parameters['max_iter'],
            tol=parameters['tol'], debug=False, cache_dir=cache_dir, num=num)
        for alpha, num in zip(alphas, itertools.count(next_num)))
def sample_precision_space(parameters, number=100):
    """Launch a large number of times the same estimation, with different
    starting points.

    number: int
        number of samples to generate.
    """
    # Estimation
    max_iter = 200

    # Generate signals
    next_num, cache_dir, gt = create_signals(parameters,
                                             output_dir="_gsc_sensitivity")
    precisions, topology, signals = (gt["precisions"], gt["topology"],
                                     gt["signals"])

    emp_covs, n_samples = empirical_covariances(signals)

    print("alpha max: %.3e" % compute_alpha_max(emp_covs, n_samples)[0])

    # Estimate a lot of precision matrices
    parameters = joblib.Parallel(n_jobs=7, verbose=1)(
        joblib.delayed(save_group_sparse_covariance)(
            emp_covs, n_samples, parameters["alpha"], max_iter=max_iter,
            tol=parameters["tol"], cache_dir=cache_dir, num=n)
        for n in xrange(next_num, next_num + number))
def benchmark1(output_dir="_prof_group_sparse_covariance"):
    """Run group_sparse_covariance on a simple case, for benchmarking."""
    parameters = {'n_tasks': 40, 'n_var': 30, 'density': 0.15,
                  'alpha': .01, 'tol': 1e-4, 'max_iter': 50}

    _, _, gt = create_signals(parameters,
                              output_dir=output_dir)

    _, est_precs = utils.timeit(group_sparse_covariance)(
        gt["signals"], parameters['alpha'], max_iter=parameters['max_iter'],
        tol=parameters['tol'], verbose=1, debug=False)

    # Check that output doesn't change between invocations.
    utils.cache_array(est_precs, os.path.join(output_dir,
                                              "benchmark1_est_precs.npy"),
                      decimal=4)
def benchmark(parameters, output_d="_convergence"):
    _, _, gt = create_signals(parameters, output_dir=output_d)

    emp_covs, n_samples = empirical_covariances(gt["signals"])
    print("alpha_max: %.3e, %.3e" % compute_alpha_max(emp_covs, n_samples))

    sp = ScoreProbe(duality_gap=True)
    _group_sparse_covariance(
        emp_covs, n_samples, alpha=parameters["alpha"], tol=parameters["tol"],
        max_iter=parameters["max_iter"], probe_function=sp, verbose=1)

    return {"log_lik": np.asarray(sp.log_lik),
            "objective": np.asarray(sp.objective),
            "precisions": np.asarray(sp.precisions),
            "duality_gap": np.asarray(sp.duality_gap),
            "time": np.asarray(sp.wall_clock)}, gt
def benchmark2(output_dir="_prof_group_sparse_covariance"):
    """Run GroupSparseCovarianceCV on a simple case, for benchmarking."""
    parameters = {'n_tasks': 40, 'n_var': 10, 'density': 0.15,
                  'alphas': 4, 'tol': 1e-4, 'max_iter': 50}
    parameters["tol_cv"] = parameters["tol"]
    parameters["max_iter_cv"] = parameters["max_iter"]
    _, _, gt = create_signals(parameters,
                              output_dir=output_dir)

    gsc = GroupSparseCovarianceCV(alphas=parameters['alphas'],
                                  max_iter=parameters['max_iter'],
                                  tol=parameters['tol'],
                                  max_iter_cv=parameters['max_iter_cv'],
                                  tol_cv=parameters['tol_cv'],
                                  verbose=1, debug=False,
                                  early_stopping=True)
    utils.timeit(gsc.fit)(gt["signals"])
    print(gsc.alpha_)
    utils.cache_array(gsc.precisions_,
                      os.path.join(output_dir,
                      "est_precs_cv_{n_var:d}.npy".format(**parameters)),
                      decimal=3)
def lasso_gsc_comparison():
    """Check that graph lasso and group-sparse covariance give the same
    output for a single task."""
    from sklearn.covariance import graph_lasso, empirical_covariance

    parameters = {'n_tasks': 1, 'n_var': 20, 'density': 0.15,
                  'rho': .2, 'tol': 1e-4, 'max_iter': 50}

    _, _, gt = create_signals(parameters, output_dir=output_dir)
    signals = gt["signals"]

    _, gsc_precision = utils.timeit(group_sparse_covariance)(
        signals, parameters['rho'], max_iter=parameters['max_iter'],
        tol=parameters['tol'], verbose=1, debug=False)

    emp_cov = empirical_covariance(signals[0])
    _, gl_precision = utils.timeit(graph_lasso)(
        emp_cov, parameters['rho'], tol=parameters['tol'],
        max_iter=parameters['max_iter'])

    np.testing.assert_almost_equal(gl_precision, gsc_precision[..., 0],
                                   decimal=4)
def singular_cov_case():
    """Check behaviour of algorithm for singular input matrix."""
    parameters = {'n_tasks': 10, 'n_var': 40, 'density': 0.15,
                  'rho': .1, 'tol': 1e-2, 'max_iter': 50,
                  'min_samples': 10, 'max_samples': 15}

    _, _, gt = create_signals(parameters, output_dir=output_dir)
    signals = gt["signals"]

    emp_covs, _ = empirical_covariances(signals)

    # Check that all covariance matrices are singular.
    eps = np.finfo(float).eps
    for k in range(emp_covs.shape[-1]):
        eigvals = np.linalg.eigvalsh(emp_covs[..., k])
        assert(abs(eigvals.min()) <= 50 * eps)

    _, gsc_precisions = utils.timeit(group_sparse_covariance)(
        signals, parameters['rho'], max_iter=parameters['max_iter'],
        tol=parameters['tol'], verbose=1, debug=False)

    print('found sparsity: {0:.3f}'
          ''.format(1. * (gsc_precisions[..., 0] != 0).sum()
                    / gsc_precisions.shape[0] ** 2))
def benchmark3():
    """Compare group_sparse_covariance result for different initializations.
    """
    ## parameters = {'n_tasks': 10, 'n_var': 50, 'density': 0.15,
    ##               'alpha': .001, 'tol': 1e-2, 'max_iter': 100}
    parameters = {'n_var': 40, 'n_tasks': 10, 'density': 0.15,
                  'alpha': .01, 'tol': 1e-3, 'max_iter': 100}

    mem = joblib.Memory(".")

    _, _, gt = create_signals(parameters,
                              output_dir="_prof_group_sparse_covariance")
    signals = gt["signals"]

    emp_covs, n_samples = empirical_covariances(signals)
    print("alpha max: " + str(compute_alpha_max(emp_covs, n_samples)))

    # With diagonal elements initialization
    probe1 = ScoreProbe()
    est_precs1, probe1 = mem.cache(modified_gsc)(signals, parameters, probe1)
    probe1.comment = "diagonal"  # set after execution for joblib not to see it
    probe1.plot()

    # With Ledoit-Wolf initialization
    ld = np.empty(emp_covs.shape)
    for k in range(emp_covs.shape[-1]):
        ld[..., k] = np.linalg.inv(ledoit_wolf(signals[k])[0])

    probe1 = ScoreProbe()
    est_precs1, probe1 = utils.timeit(mem.cache(modified_gsc))(
        signals, parameters, probe=probe1)
    probe1.comment = "diagonal"  # for joblib to ignore this value

    probe2 = ScoreProbe()
    parameters["precisions_init"] = ld
    est_precs2, probe2 = utils.timeit(mem.cache(modified_gsc))(
        signals, parameters, probe=probe2)
    probe2.comment = "ledoit-wolf"

    print("difference between final estimates (max norm) %.2e"
          % abs(est_precs1 - est_precs2).max())

    pl.figure()
    pl.semilogy(probe1.timings[1:], probe1.max_norm,
                "+-", label=probe1.comment)
    pl.semilogy(probe2.timings[1:], probe2.max_norm,
                "+-", label=probe2.comment)
    pl.xlabel("Time [s]")
    pl.ylabel("Max norm")
    pl.grid()
    pl.legend(loc="best")

    pl.figure()
    pl.plot(probe1.timings, probe1.objective,
                "+-", label=probe1.comment)
    pl.plot(probe2.timings, probe2.objective,
                "+-", label=probe2.comment)
    pl.xlabel("Time [s]")
    pl.ylabel("objective")
    pl.grid()
    pl.legend(loc="best")

    pl.show()