def test_gpr_regularization(): '''Training in the precense of duplicate input values.''' class Kernel: def __init__(self, v, L): self.v = v self.L = L def __call__(self, X, Y=None, eval_gradient=False): v = self.v L = self.L d = np.subtract.outer(X, Y if Y is not None else X) f = v**2 * np.exp(-0.5 * d**2 / L**2) if eval_gradient is False: return f else: j1 = v**2 * np.exp(-0.5 * d**2 / L**2) * d**2 * L**-3 j2 = 2 * v * f return f, np.stack((j1, j2), axis=2) def diag(self, X): return np.ones_like(X) @property def theta(self): return np.log([self.v, self.L]) @theta.setter def theta(self, t): self.v, self.L = np.exp(t[:2]) @property def bounds(self): return np.log([[0.001, 10.0], [0.001, 10.0]]) def clone_with_theta(self, theta): k = Kernel(1.0, 1.0) k.theta = theta return k X = np.array([0, 1, 1, 2]) y = np.array([1, 0, 1, 0]) gpr1 = GaussianProcessRegressor( kernel=Kernel(100.0, 1.0), alpha=1e-6, optimizer=False, regularization='*' ) gpr2 = GaussianProcessRegressor( kernel=Kernel(100.0, 1.0), alpha=1e-4, optimizer=False, regularization='+' ) gpr1.fit(X, y, tol=1e-5) gpr2.fit(X, y, tol=1e-5) grid = np.linspace(0, 2, 9) assert np.allclose( gpr1.predict(grid), gpr2.predict(grid), rtol=1e-5, atol=1e-5 )
def test(chosen, label, color): gpr = GaussianProcessRegressor(kernel, normalize_y=True) gpr.fit(X[chosen], y[chosen]) plt.scatter(X[chosen], y[chosen], label=label, color=color) plt.plot(grid, gpr.predict(grid), label=label, color=color) print(f"RMSE of '{label}' model:", np.std(gpr.predict(X) - y)) print(f"{label} det: {np.prod(np.linalg.slogdet(kernel(X[chosen])))}")
def test_gpr_fit_duplicate_x(loss): '''Training in the precense of duplicate input values.''' class Kernel: def __init__(self, L): self.L = L def __call__(self, X, Y=None, eval_gradient=False): L = self.L d = np.subtract.outer(X, Y if Y is not None else X) f = np.exp(-0.5 * d**2 / L**2) if eval_gradient is False: return f else: j = np.exp(-0.5 * d**2 / L**2) * d**2 * L**-3 return f, np.stack((j, ), axis=2) def diag(self, X): return np.ones_like(X) @property def theta(self): return np.log([self.L]) @theta.setter def theta(self, t): self.L = np.exp(t[0]) @property def bounds(self): return np.log([[0.001, 10.0]]) def clone_with_theta(self, theta): k = Kernel(1.0) k.theta = theta return k X = np.array([0, 1, 1, 2, 3, 3.995, 4, 6, 7, 8, 8.0001, 9]) y = np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1]) kernel = Kernel(1.0) gpr = GaussianProcessRegressor(kernel=kernel, alpha=0, optimizer=True) gpr.fit(X, y, tol=1e-5, loss=loss) assert gpr.predict([1]) == pytest.approx(0.5) assert gpr.predict([4]) == pytest.approx(1.0) assert gpr.predict([8]) == pytest.approx(0.0)
def test_gpr_fit_mle(repeat, verbose): '''test with a function with exactly two periods, and see if the GPR can identify the frequency via hyperparameter optimization.''' class Kernel: def __init__(self, p, L): self.p = p self.L = L def __call__(self, X, Y=None, eval_gradient=False): d = np.subtract.outer(X, Y if Y is not None else X) f = np.exp(-2 * np.sin(np.pi / self.p * d)**2 / self.L**2) if eval_gradient is False: return f else: s = np.sin(d * np.pi / self.p) c = np.cos(d * np.pi / self.p) j1 = 2.0 * np.pi * d * 2 * s * c * f / self.p**2 / self.L**2 j2 = 4.0 * s**2 * f / self.L**3 return f, np.stack((j1, j2), axis=2) def diag(self, X): return np.ones_like(X) @property def theta(self): return np.log([self.p, self.L]) @theta.setter def theta(self, t): self.p, self.L = np.exp(t) @property def bounds(self): return np.log([[1e-2, 10], [1e-2, 10]]) X = np.linspace(0, 1, 16, endpoint=False) y = np.sin(X * 4 * np.pi) kernel = Kernel(0.49, 0.1) gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, optimizer=True) gpr.fit(X, y, tol=1e-5, repeat=repeat, verbose=verbose) assert(kernel.p == pytest.approx(0.5, 1e-2))
def test_gpr_fit_masked_target(): class Kernel: def __call__(self, X, Y=None): return np.exp(-np.subtract.outer(X, Y if Y is not None else X)**2) def diag(self, X): return np.ones_like(X) X = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) y = np.random.randn(10) bad = [1, 4, 7] y[bad] = None kernel = Kernel() gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-12) gpr.fit(X, y) assert(np.all(np.isfinite(gpr.predict(X)))) baseline = GaussianProcessRegressor(kernel=kernel, alpha=1e-12) baseline.fit(X[~np.isnan(y)], y[~np.isnan(y)]) grid = np.linspace(-1, 10, 100) assert(np.allclose(gpr.predict(grid), baseline.predict(grid)))
def test_gpr_fit_self_consistency(X, y): class Kernel: def __call__(self, X, Y=None): return np.exp(-np.subtract.outer(X, Y if Y is not None else X)**2) def diag(self, X): return np.ones_like(X) kernel = Kernel() gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-12) with pytest.raises(RuntimeError): gpr.predict(X) gpr.fit(X, y) z = gpr.predict(X) assert(z == pytest.approx(y, 1e-3, 1e-3)) z, std = gpr.predict(X, return_std=True) assert(z == pytest.approx(y, 1e-3, 1e-3)) assert(std == pytest.approx(np.zeros_like(y), 1e-3, 1e-3)) z, cov = gpr.predict(X, return_cov=True) assert(z == pytest.approx(y, 1e-3, 1e-3)) assert(cov == pytest.approx(np.zeros((len(X), len(X))), 1e-3, 1e-3))
def test_gpr_predict_periodic(): '''test with a function with exactly two periods, and see if the GPR can use information across the periods to fill in the missing points.''' class Kernel: def __call__(self, X, Y=None): d = np.subtract.outer(X, Y if Y is not None else X) return np.exp(-2 * np.sin(np.pi / 0.5 * d)**2) def diag(self, X): return np.ones_like(X) kernel = Kernel() X = np.linspace(0, 1, 16, endpoint=False) y = np.sin(X * 4 * np.pi) mask = np.array([1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1], dtype=np.bool_) gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-10) gpr.fit(X[mask], y[mask]) z = gpr.predict(X[~mask]) assert(z == pytest.approx(y[~mask], 1e-6))
def test_gpr_singular_kernel_matrix(): class Kernel: def __init__(self, L): self.L = L def __call__(self, X, Y=None, eval_gradient=False): L = self.L d = np.subtract.outer(X, Y if Y is not None else X) f = np.exp(-0.5 * d**2 / L**2) if eval_gradient is False: return f else: j = np.exp(-0.5 * d**2 / L**2) * d**2 * L**-3 return f, np.stack((j, ), axis=2) def diag(self, X): return np.ones_like(X) @property def theta(self): return np.log([self.L]) @theta.setter def theta(self, t): self.L = np.exp(t[0]) def clone_with_theta(self, theta): k = Kernel(1.0) k.theta = theta return k X = np.ones(3) y = np.random.rand(3) gpr = GaussianProcessRegressor(kernel=Kernel(1.0), alpha=0) gpr.fit(X, y) z = gpr.predict(X) assert(z == pytest.approx(np.mean(y)))
def test_gpr_predict_loocv(f): class Kernel: def __call__(self, X, Y=None): return np.exp(-np.subtract.outer(X, Y if Y is not None else X)**2) def diag(self, X): return np.ones_like(X) kernel = Kernel() gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-12) X = np.linspace(-1, 1, 6) y = f(X) y_loocv, std_loocv = gpr.predict_loocv(X, y, return_std=True) assert(y_loocv == pytest.approx(gpr.predict_loocv(X, y, return_std=False))) for i, _ in enumerate(X): Xi = np.delete(X, i) yi = np.delete(y, i) gpr_loocv = GaussianProcessRegressor(kernel=kernel, alpha=1e-12) gpr_loocv.fit(Xi, yi) y_loocv_i, std_loocv_i = gpr_loocv.predict(X[[i]], return_std=True) assert(y_loocv_i.item() == pytest.approx(y_loocv[i], 1e-7)) assert(std_loocv_i.item() == pytest.approx(std_loocv[i], 1e-7))
# Dummy rewriter of a real numbers class RandomJitter(AbstractRewriter): def __init__(self, s, n): self.s = s self.n = n def __call__(self, x, rng): return np.minimum(3, np.maximum(0, rng.normal(x.g, self.s, self.n))) # Function to be learned def f(x): return np.sin(x) + 2e-4 * x**3 - 2.0 * np.exp(-x**2) # return 2 * x # GPR surrogate model x = np.linspace(0, 3, 13) y = f(x) gpr = GaussianProcessRegressor(kernel=Kernel(0.5)) gpr.fit(x, y) # Run MCTS mcts = MCTSGraphTransformer( rewriter=RandomJitter(0.333, 9), surrogate=gpr, ) # print(mcts.seek(g0=0.5, target=2.0)) print(mcts.seek(g0=0.5, target=0.0, maxiter=20))