def test_gradient_beta(n_samples, log_domain, gradient): p = 2 n, m = 10, 15 eps = 1 n_layers = 100 alphas, beta, C, *_ = make_ot(n, m, p, n_samples=n_samples, random_state=0) snet = Sinkhorn(n_layers=n_layers, log_domain=log_domain, gradient_computation=gradient) snet_star = Sinkhorn(n_layers=1000, log_domain=False, gradient_computation='analytic') (f, g), _ = snet.transform(alphas, beta, C, eps) (f_star, g_star), _ = snet_star.transform(alphas, beta, C, eps) err_norm = np.sqrt( np.linalg.norm((f - f_star).ravel())**2 + np.linalg.norm((g - g_star).ravel())**2) assert err_norm < 1e-6 # Get the gradient with analytic formula and autodiff G = snet.gradient_beta(alphas, beta, C, eps) G_star = snet_star.gradient_beta(alphas, beta, C, eps) assert np.allclose(G, G_star)
def get_optimal_transport_problem(n_alpha=100, n_beta=30, point_dim=2, n_samples=1, eps=1e-1, random_state=None): alphas, beta, C, *_ = make_ot(n_alpha=n_alpha, n_beta=n_beta, point_dim=point_dim, n_samples=n_samples, random_state=random_state) return alphas[:1], beta, C, eps
def run_benchmark(n_samples=10, n_alpha=100, eps=1, n_outer=300, step_size=.1, max_layers=64, gpu=False): """Benchmark for the wasserstein barycenter computation time Parameters: ----------- n_samples: int (default: 10) Number of distribution to compute the barycenter from. n_alpha: int (default: 100) Number of point in the support of the distributions. eps: float (default: 1) Entropy regularization parameter for the Wasserstein distance. n_outer: int (default: 300) Maximal number of iteration run for the gradient descent algorithm. step_size: float (default: .1) Step size for the gradient descent. max_layers: int (default: 64) The benchmark is computed for a number of inner layers from 1 to max_layers in logscale. The max_layer will be rounded to the largest power of 2 below. gpu: int (default: None) If set, will run the computation on GPU number `gpu`. """ device = f'cuda:{gpu}' if gpu is not None else None meta = dict(n_samples=n_samples, n_alpha=n_alpha, n_beta=n_alpha, point_dim=2) alphas, _, C, *_ = make_ot(**meta) results = [] max_layers = int(np.log2(max_layers)) n_iters = np.unique(np.logspace( 0, max_layers, num=max_layers + 1, base=2, dtype=int)) for n_inner in n_iters: for gradient in ['autodiff', 'analytic']: print(f"Fitting {gradient}[{n_inner}]:", end='', flush=True) beta_star, res = wasserstein_barycenter( alphas, C, eps, n_outer=n_outer, n_inner=n_inner, step_size=step_size, gradient=gradient, device=device, meta=meta) results.extend(res) print("Fitting optimal barycenter:", end='', flush=True) beta_star, res = wasserstein_barycenter( alphas, C, eps, n_outer=2 * n_outer, n_inner=N_INNER_FULL, step_size=step_size, gradient='analytic', device=device, meta=meta) results.extend(res) df = pd.DataFrame(results) tag = f"{datetime.now().strftime('%Y-%m-%d_%Hh%M')}" df.to_pickle(os.path.join(OUTPUT_DIR, f"{BENCH_NAME}_{tag}.pkl"))
def run_benchmark(n_rep=50, max_layers=100, n_probe_layers=20, gpu=None): """Benchmark for the gradient computation time (analytic vs autodiff) Parameters: ----------- n_rep: int (default: 50) Number of repetition for the benchmark. For each repetition, a new problem is created and the gradient are computed for different number of layers. max_layers: int (default: 100) Maximal number of layers. The benchmark is run for different number of n_layers which are chosen in log-scale between 1 and max_layers. n_probe_layers: int (default: 20) Number of number of layers chosen in the log-scale. gpu: int (default: none) If not None, use GPU number `gpu` to run the gradient computation. """ eps = 1 dimensions = dict(n_alpha=1000, n_beta=500, point_dim=2, n_samples=100) device = f'cuda:{gpu}' if gpu is not None else None layers = np.unique(np.logspace(0, np.log(max_layers), n_probe_layers, dtype=int)) n_probe_layers = len(layers) layers = np.minimum(max_layers, layers) results = [] for j in range(n_rep): alpha, beta, C, *_ = make_ot(**dimensions, random_state=None) args = check_tensor(alpha, beta, C, device=device) for i, nl in enumerate(layers): progress = (j*n_probe_layers + i) / (n_rep * n_probe_layers) print(f"\rBenchmark gradient computation on {device}: " f"{progress:.1%}", end='', flush=True) for gradient in ['analytic', 'autodiff', 'implicit']: model = Sinkhorn( n_layers=nl, gradient_computation=gradient, device=device, log_domain=False) t_start = time() model.gradient_beta(*args, eps=eps) delta_t = time() - t_start results.append(dict( gradient=gradient, n_layers=nl, time=delta_t, **dimensions )) df = pd.DataFrame(results) tag = f"{datetime.now().strftime('%Y-%m-%d_%Hh%M')}" df.to_pickle(os.path.join(OUTPUT_DIR, f"{BENCH_NAME}_{tag}.pkl"))
def test_sinkhorn_hessian(): p = 2 n, m = 10, 15 eps = .1 n_layers = 2000 alpha, beta, C, *_ = make_ot(n, m, p, n_samples=1, random_state=0) K = np.exp(-C / eps) u, v = sinkhorn(alpha, beta, K, n_layers) z = np.concatenate([u, v]) H = dzz(K, alpha, beta, z, eps) hess = autograd.hessian(dual_loss, argnum=0) H1 = hess(eps * np.log(z), alpha, beta, K, eps) assert np.allclose(H, H1)
def test_sinkhorn_np(n_samples, log_domain): p = 2 n, m = 10, 15 eps = .1 n_layers = 500 alphas, beta, C, *_ = make_ot(n, m, p, n_samples=n_samples, random_state=0) K = np.exp(-C / eps) snet = Sinkhorn(n_layers=n_layers, log_domain=log_domain) (f, g), _ = snet.transform(alphas, beta, C, eps) for i in range(n_samples): u, v = sinkhorn(alphas[i], beta, K, n_layers) assert np.allclose(f[i], eps * np.log(u)) assert np.allclose(g[i], eps * np.log(v))
def test_gradient(n_iter, grad, f_grad): p = 2 n, m = 10, 15 eps = 1 n_samples = 2 alphas, beta, C, *_ = make_ot(n, m, p, n_samples=n_samples, random_state=0) K = np.exp(-C / eps) # Compute gradient with default parameters sinkhorn = Sinkhorn(n_layers=n_iter, gradient_computation=grad) for i in range(n_samples): g = sinkhorn.gradient_beta(alphas[i:i + 1], beta, C, eps) g_np = f_grad(alphas[i], beta, K, eps, n_iter)[n:] assert np.allclose(g_np, g), np.linalg.norm(g - g_np)
def test_log_domain(eps, n_layers): """Test that the log domain computation is equivalent to classical sinkhorn """ p = 2 n, m = 10, 15 alpha, beta, C, *_ = make_ot(n, m, p, random_state=0) alpha = np.r_['0,2', alpha, alpha] snet1 = Sinkhorn(n_layers, log_domain=True) (f1, g1), _ = snet1.transform(alpha, beta, C, eps) snet2 = Sinkhorn(n_layers, log_domain=False) (f2, g2), _ = snet2.transform(alpha, beta, C, eps) assert np.allclose(f1, f2) assert np.allclose(g1, g2) # Check that the scores are well computed assert np.isclose(snet1.score(alpha, beta, C, eps), snet2.score(alpha, beta, C, eps))
random_state=8), 'to_plot': ['z', 'g1', 'g2'], }, 'sinkhorn': { 'name': 'Wasserstein Distance', 'model': Sinkhorn, 'max_layer': 60, 'model_args': dict(log_domain=False), 'pb_func': get_optimal_transport_problem, 'pb_args': dict(n_alpha=100, n_beta=30, point_dim=2, n_samples=2, random_state=53), 'to_plot': ['g1', 'g2', 'g3', 'z'], }, } make_ot() if args.plot: plot_benchmark(config, file_name=args.file) else: run_benchmark(config)