def update_plot(val=None): sigma = 2**s_sigma.val lmbda = 2**s_lmbda.val K = gaussian_kernel(Z, sigma=sigma) b = _compute_b_sym(Z, K, sigma) C = _compute_C_sym(Z, K, sigma) a = score_matching_sym(Z, sigma, lmbda, K, b, C) J = _objective_sym(Z, sigma, lmbda, a, K, b, C) J_xval = np.mean(xvalidate(Z, 5, sigma, lmbda, K, num_repetitions=3)) print(a[:5]) kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=sigma) kernel_grad = lambda x, X=None: gaussian_kernel_grad(x, X, sigma) logq_est = lambda x: log_pdf_estimate(x, a, Z, kernel) dlogq_est = lambda x: log_pdf_estimate_grad(x, a, Z, kernel_grad) description = "N=%d, sigma: %.2f, lambda: %.2f, J(a)=%.2f, XJ(a)=%.2f" % \ (N, sigma, lmbda, J, J_xval) if plot_pdf: D = evaluate_density_grid(Xs, Ys, logq_est) description = "log-pdf: " + description else: D = evaluate_density_grad_grid(Xs, Ys, dlogq_est) description = "norm-grad-log-pdf: " + description ax.clear() ax.plot(Z[:, 0], Z[:, 1], 'bx') plot_array(Xs, Ys, D, ax, plot_contour=True) ax.set_title(description) fig.canvas.draw_idle()
def _objective(X, Y, sigma, lmbda, alpha, K=None, K_XY=None, b=None, C=None): if K_XY is None: K_XY = gaussian_kernel(X, Y, sigma=sigma) if K is None and lmbda > 0: if X is Y: K = K_XY else: K = gaussian_kernel(X, sigma=sigma) if b is None: b = _compute_b(X, Y, K_XY, sigma) if C is None: C = _compute_C(X, Y, K_XY, sigma) NX = len(X) first = 2. / (NX * sigma) * alpha.dot(b) if lmbda > 0: second = 2. / (NX * sigma ** 2) * alpha.dot( (C + (K + np.eye(len(C))) * lmbda).dot(alpha) ) else: second = 2. / (NX * sigma ** 2) * alpha.dot((C).dot(alpha)) J = first + second return J
def test_objective_matches_sym_precomputed_KbC(): sigma = 1. lmbda = 1. Z = np.random.randn(100, 2) K = gaussian_kernel(Z, sigma=sigma) alpha = np.random.randn(len(Z)) C = _compute_C_sym(Z, K, sigma) b = _compute_b_sym(Z, K, sigma) K = gaussian_kernel(Z, sigma=sigma) J_sym = _objective_sym(Z, sigma, lmbda, alpha, K, b, C) J = _objective(Z, Z, sigma, lmbda, alpha, K_XY=K, b=b, C=C) assert_equal(J, J_sym)
def plot_lmbda_surface(val): print("lambda") log2_sigma = s_sigma.val sigma = 2**log2_sigma K = gaussian_kernel(Z, sigma=sigma) log2_lambdas = np.linspace(s_lmbda.valmin, s_lmbda.valmax) Js = np.array([ np.mean(xvalidate(Z, 5, sigma, 2**log2_lmbda, K, num_repetitions=3)) for log2_lmbda in log2_lambdas ]) log2_lambda_min = log2_lambdas[Js.argmin()] log_Js = np.log(Js - (Js.min() if Js.min() < 0 else 0) + 1) # update slider s_lmbda.set_val(log2_lambda_min) update_plot() plt.figure() plt.plot(log2_lambdas, log_Js) plt.plot([log2_lambda_min, log2_lambda_min], [log_Js.min(), log_Js.max()], 'r') plt.title( r"$\lambda$ surface for $\log_2 \sigma=%.2f$, best value of $J(\alpha)=%.2f$ at $\log_2 \lambda=%.2f$" % (log2_sigma, Js.min(), log2_lambda_min)) plt.show()
def test_objective_sym_against_naive(): sigma = 1. D = 2 N = 10 Z = np.random.randn(N, D) K = gaussian_kernel(Z, sigma=sigma) num_trials = 10 for _ in range(num_trials): alpha = np.random.randn(N) J_naive_a = 0 for d in range(D): for i in range(N): for j in range(N): J_naive_a += alpha[i] * K[i, j] * \ (-1 + 2. / sigma * ((Z[i][d] - Z[j][d]) ** 2)) J_naive_a *= (2. / (N * sigma)) J_naive_b = 0 for d in range(D): for i in range(N): temp = 0 for j in range(N): temp += alpha[j] * (Z[j, d] - Z[i, d]) * K[i, j] J_naive_b += (temp**2) J_naive_b *= (2. / (N * (sigma**2))) J_naive = J_naive_a + J_naive_b # compare to unregularised objective lmbda = 0. J = _objective_sym(Z, sigma, lmbda, alpha, K) assert_close(J_naive, J)
def optimise_sigma_surface(val): print("sigma") log2_lmbda = s_lmbda.val lmbda = 2**log2_lmbda log2_sigmas = np.linspace(s_sigma.valmin, s_sigma.valmax) Js = np.zeros(len(log2_sigmas)) for i, log2_sigma in enumerate(log2_sigmas): sigma = 2**log2_sigma K = gaussian_kernel(Z, sigma=sigma) Js[i] = np.mean(xvalidate(Z, 5, sigma, lmbda, K, num_repetitions=3)) log2_sigma_min = log2_sigmas[Js.argmin()] log_Js = np.log(Js - (Js.min() if Js.min() < 0 else 0) + 1) # update slider s_sigma.set_val(log2_sigma_min) update_plot() plt.figure() plt.plot(log2_sigmas, log_Js) plt.plot([log2_sigma_min, log2_sigma_min], [log_Js.min(), log_Js.max()], 'r') plt.title( r"$\sigma$ surface for $\log_2 \lambda=%.2f$, best value of $J(\alpha)=%.2f$ at $\log_2 \sigma=%.2f$" % (log2_lmbda, Js.min(), log2_sigma_min)) plt.show()
def test_incomplete_cholesky_2(): X = np.arange(9.0).reshape(3, 3) kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=8.) temp = incomplete_cholesky(X, kernel, eta=0.999) R, K_chol, I, W = (temp["R"], temp["K_chol"], temp["I"], temp["W"]) K = kernel(X) assert_equal(len(I), 2) assert_equal(I[0], 0) assert_equal(I[1], 2) assert_equal(K_chol.shape, (len(I), len(I))) for i in range(len(I)): assert_equal(K_chol[i, i], K[I[i], I[i]]) assert_equal(R.shape, (len(I), len(X))) assert_almost_equal(R[0, 0], 1.000000000000000) assert_almost_equal(R[0, 1], 0.034218118311666) assert_almost_equal(R[0, 2], 0.000001370959086) assert_almost_equal(R[1, 0], 0) assert_almost_equal(R[1, 1], 0.034218071400058) assert_almost_equal(R[1, 2], 0.999999999999060) assert_equal(W.shape, (len(I), len(X))) assert_almost_equal(W[0, 0], 1.000000000000000) assert_almost_equal(W[0, 1], 0.034218071400090) assert_almost_equal(W[0, 2], 0) assert_almost_equal(W[1, 0], 0) assert_almost_equal(W[1, 1], 0.034218071400090) assert_almost_equal(W[1, 2], 1)
def test_incomplete_cholesky_check_given_rank(): kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=20.) X = np.random.randn(300, 10) eta = 5 K_chol = incomplete_cholesky(X, kernel, eta=eta)["K_chol"] assert_equal(K_chol.shape[0], eta)
def test_incomplete_cholesky_1(): X = np.arange(9.0).reshape(3, 3) kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=200.) temp = incomplete_cholesky(X, kernel, eta=0.8, power=2) R, K_chol, I, W = (temp["R"], temp["K_chol"], temp["I"], temp["W"]) K = kernel(X) assert_equal(len(I), 2) assert_equal(I[0], 0) assert_equal(I[1], 2) assert_equal(K_chol.shape, (len(I), len(I))) for i in range(len(I)): assert_equal(K_chol[i, i], K[I[i], I[i]]) assert_equal(R.shape, (len(I), len(X))) assert_almost_equal(R[0, 0], 1.000000000000000) assert_almost_equal(R[0, 1], 0.763379494336853) assert_almost_equal(R[0, 2], 0.339595525644939) assert_almost_equal(R[1, 0], 0) assert_almost_equal(R[1, 1], 0.535992421608228) assert_almost_equal(R[1, 2], 0.940571570355992) assert_equal(W.shape, (len(I), len(X))) assert_almost_equal(W[0, 0], 1.000000000000000) assert_almost_equal(W[0, 1], 0.569858199525808) assert_almost_equal(W[0, 2], 0) assert_almost_equal(W[1, 0], 0) assert_almost_equal(W[1, 1], 0.569858199525808) assert_almost_equal(W[1, 2], 1)
def test_compute_C_run_asym(): sigma = 1. X = np.random.randn(100, 2) Y = np.random.randn(100, 2) K_XY = gaussian_kernel(X, Y, sigma=sigma) _ = _compute_C(X, Y, K_XY, sigma=sigma)
def select_sigma_lambda_cma(Z, num_folds=5, num_repetitions=1, sigma0=1.1, lmbda0=1.1, cma_opts={}, disp=False): import cma start = np.log2(np.array([sigma0, lmbda0])) es = cma.CMAEvolutionStrategy(start, 1., cma_opts) while not es.stop(): if disp: es.disp() solutions = es.ask() values = np.zeros(len(solutions)) for i, (log2_sigma, log2_lmbda) in enumerate(solutions): sigma = 2**log2_sigma lmbda = 2**log2_lmbda K = gaussian_kernel(Z, sigma=sigma) folds = xvalidate(Z, num_folds, sigma, lmbda, K) values[i] = np.mean(folds) logger.info("particle %d/%d, sigma: %.2f, lambda: %.2f, J=%.4f" % \ (i + 1, len(solutions), sigma, lmbda, values[i])) es.tell(solutions, values) return es
def select_sigma_grid(Z, num_folds=5, num_repetitions=1, log2_sigma_min=-3, log2_sigma_max=10, resolution_sigma=25, lmbda=1., plot_surface=False): sigmas = 2**np.linspace(log2_sigma_min, log2_sigma_max, resolution_sigma) Js = np.zeros(len(sigmas)) for i, sigma in enumerate(sigmas): K = gaussian_kernel(Z, sigma=sigma) folds = xvalidate(Z, num_folds, sigma, lmbda, K) Js[i] = np.mean(folds) logger.info("sigma trial %d/%d, sigma: %.2f, lambda: %.2f, J=%.2f" % \ (i + 1, len(sigmas), sigma, lmbda, Js[i])) if plot_surface: plt.figure() plt.plot(np.log2(sigmas), Js) best_sigma_idx = Js.argmin() best_sigma = sigmas[best_sigma_idx] logger.info("Best sigma: %.2f with J=%.2f" % (best_sigma, Js[best_sigma_idx])) return best_sigma
def test_compute_C_matches_sym(): sigma = 1. Z = np.random.randn(10, 2) K = gaussian_kernel(Z, sigma=sigma) C = _compute_C_sym(Z, K, sigma=sigma) C_sym = _compute_C(Z, Z, K, sigma=sigma) assert_allclose(C, C_sym)
def test_compute_b_matches_sym(): sigma = 1. Z = np.random.randn(10, 2) K = gaussian_kernel(Z, sigma=sigma) b = _compute_b_sym(Z, K, sigma=sigma) b_sym = _compute_b(Z, Z, K, sigma=sigma) assert_allclose(b, b_sym)
def test_score_matching_objective_matches_sym(): sigma = 1. lmbda = 1. Z = np.random.randn(100, 2) K = gaussian_kernel(Z, sigma=sigma) J_sym = score_matching_sym(Z, sigma, lmbda, K) J = score_matching(Z, Z, sigma, lmbda, K) assert_allclose(J, J_sym)
def test_compute_b_sym_low_rank_matches_full(): sigma = 1. Z = np.random.randn(100, 2) low_rank_dim = int(len(Z) * .9) K = gaussian_kernel(Z, sigma=sigma) R = incomplete_cholesky_gaussian(Z, sigma, eta=low_rank_dim)["R"] x = _compute_b_sym(Z, K, sigma) y = _compute_b_low_rank_sym(Z, R.T, sigma) assert_allclose(x, y, atol=5e-1)
def test_incomplete_cholesky_new_point(): kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=200.) X = np.random.randn(1000, 10) low_rank_dim = 15 temp = incomplete_cholesky(X, kernel, eta=low_rank_dim) R, I, nu = (temp["R"], temp["I"], temp["nu"]) # construct train-train kernel matrix approximation using one by one calls for i in range(low_rank_dim): r = incomplete_cholesky_new_point(X, X[i], kernel, I, R, nu) assert_allclose(r, R[:, i])
def score_matching(X, Y, sigma, lmbda, K=None): # compute kernel matrix if needed if K is None: K = gaussian_kernel(X, Y, sigma=sigma) b = _compute_b(X, Y, K, sigma) C = _compute_C(X, Y, K, sigma) # solve regularised linear system a = -sigma / 2. * np.linalg.solve(C + (K + np.eye(len(C))) * lmbda, b) return a
def test_apply_C_left_sym_low_rank_matches_full(): sigma = 1. N = 10 Z = np.random.randn(N, 2) K = gaussian_kernel(Z, sigma=sigma) R = incomplete_cholesky_gaussian(Z, sigma, eta=0.1)["R"] v = np.random.randn(Z.shape[0]) lmbda = 1. x = (_compute_C_sym(Z, K, sigma) + lmbda * (K + np.eye(len(K)))).dot(v) y = _apply_left_C_sym_low_rank(v, Z, R.T, lmbda) assert_allclose(x, y, atol=1e-1)
def test_objective_sym_same_as_from_estimation(): sigma = 1. lmbda = 1. Z = np.random.randn(100, 2) K = gaussian_kernel(Z, sigma=sigma) a = score_matching_sym(Z, sigma, lmbda, K) C = _compute_C_sym(Z, K, sigma) b = _compute_b_sym(Z, K, sigma) J = _objective_sym(Z, sigma, lmbda, a, K, b, C) J2 = _objective_sym(Z, sigma, lmbda, a, K) assert_almost_equal(J, J2)
def test_objective_sym_optimum(): sigma = 1. lmbda = 1. Z = np.random.randn(100, 2) K = gaussian_kernel(Z, sigma=sigma) a = score_matching_sym(Z, sigma, lmbda, K) J_opt = _objective_sym(Z, sigma, lmbda, a, K) for _ in range(10): a_random = np.random.randn(len(Z)) J = _objective_sym(Z, sigma, lmbda, a_random, K) assert J >= J_opt
def test_compute_b_sym_against_paper(): sigma = 1. D = 1 Z = np.random.randn(1, D) K = gaussian_kernel(Z, sigma=sigma) b = _compute_b_sym(Z, K, sigma) # compute by hand, well, it's just -k since rest is zero (look at it) x = Z[0] k = K[0, 0] b_paper = 2. / sigma * (k * (x**2) + (x**2) * k - 2 * x * k * x) - k assert_equal(b, b_paper)
def test_incomplete_cholesky_3(): kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=200.) X = np.random.randn(3000, 10) temp = incomplete_cholesky(X, kernel, eta=0.001) R, K_chol, I, W = (temp["R"], temp["K_chol"], temp["I"], temp["W"]) K = kernel(X) assert_equal(K_chol.shape, (len(I), (len(I)))) assert_equal(R.shape, (len(I), (len(X)))) assert_equal(W.shape, (len(I), (len(X)))) assert_less_equal(np.linalg.norm(K - R.T.dot(R)), .5) assert_less_equal(np.linalg.norm(K - W.T.dot(K_chol.dot(W))), .5)
def test_incomplete_cholesky_asymmetric(): kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=1.) X = np.random.randn(1000, 10) Y = np.random.randn(100, 10) low_rank_dim = int(len(X) * 0.8) temp = incomplete_cholesky(X, kernel, eta=low_rank_dim) R, I, nu = (temp["R"], temp["I"], temp["nu"]) # construct train-train kernel matrix approximation using one by one calls R_test = incomplete_cholesky_new_points(X, Y, kernel, I, R, nu) assert_allclose(kernel(X, Y), R.T.dot(R_test), atol=10e-1)
def test_compute_C_sym_against_paper(): sigma = 1. D = 1 Z = np.random.randn(1, D) K = gaussian_kernel(Z, sigma=sigma) C = _compute_C_sym(Z, K, sigma) # compute by hand, well, it's just zero (look at it) x = Z[0] k = K[0, 0] C_paper = (x * k - k * x) * (k * x - x * k) assert_equal(C, C_paper)
def test_objective_sym_low_rank_matches_full(): sigma = 1. lmbda = 1. Z = np.random.randn(100, 2) K = gaussian_kernel(Z, sigma=sigma) a_opt = score_matching_sym(Z, sigma, lmbda, K) J_opt = _objective_sym(Z, sigma, lmbda, a_opt, K) L = incomplete_cholesky_gaussian(Z, sigma, eta=0.01)["R"].T a_opt_chol = score_matching_sym_low_rank(Z, sigma, lmbda, L) J_opt_chol = _objective_sym_low_rank(Z, sigma, lmbda, a_opt_chol, L) assert_almost_equal(J_opt, J_opt_chol, delta=2.)
def test_incomplete_cholesky_new_points_euqals_new_point(): kernel = lambda X, Y=None: gaussian_kernel(X, Y, sigma=200.) X = np.random.randn(1000, 10) low_rank_dim = 15 temp = incomplete_cholesky(X, kernel, eta=low_rank_dim) R, I, nu = (temp["R"], temp["I"], temp["nu"]) R_test_full = incomplete_cholesky_new_points(X, X, kernel, I, R, nu) # construct train-train kernel matrix approximation using one by one calls R_test = np.zeros(R.shape) for i in range(low_rank_dim): R_test[:, i] = incomplete_cholesky_new_point(X, X[i], kernel, I, R, nu) assert_allclose(R_test[:, i], R_test_full[:, i])
def test_compute_b_low_rank_matches_full(): sigma = 1. X = np.random.randn(100, 2) Y = np.random.randn(50, 2) low_rank_dim = int(len(X) * 0.9) kernel = lambda X, Y: gaussian_kernel(X, Y, sigma=sigma) K_XY = kernel(X, Y) temp = incomplete_cholesky(X, kernel, eta=low_rank_dim) I, R, nu = (temp["I"], temp["R"], temp["nu"]) R_test = incomplete_cholesky_new_points(X, Y, kernel, I, R, nu) x = _compute_b(X, Y, K_XY, sigma) y = _compute_b_low_rank(X, Y, R.T, R_test.T, sigma) assert_allclose(x, y, atol=5e-1)
def score_matching_sym(Z, sigma, lmbda, K=None, b=None, C=None): # compute quantities if K is None: K = gaussian_kernel(Z, sigma=sigma) if b is None: b = _compute_b_sym(Z, K, sigma) if C is None: C = _compute_C_sym(Z, K, sigma) # solve regularised linear system a = -sigma / 2. * np.linalg.solve(C + (K + np.eye(len(C))) * lmbda, b) return a
def fun(sigma_lmbda, num_repetitions=1): log2_sigma = sigma_lmbda[0] log2_lmbda = sigma_lmbda[1] sigma = 2**log2_sigma lmbda = 2**log2_lmbda K = gaussian_kernel(Z, sigma=sigma) folds = [ xvalidate(Z, num_folds, sigma, lmbda, K) for _ in range(num_repetitions) ] J = np.mean(folds) J_std = np.std(folds) print("fun: log2_sigma=%.2f, log_lmbda=%.2f, J(a)=%.2f" % (log2_sigma, log2_lmbda, J)) return J, J_std