def regression_distance_k(Kx: np.ndarray, Ky: np.ndarray): warnings.warn('not tested yet!') import gpflow from gpflow.kernels import White, Linear from gpflow.models import GPR T = len(Kx) eig_Ky, eiy = truncated_eigen(*eigdec(Ky, min(100, T // 4))) eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, T // 4))) X = eix @ diag(sqrt(eig_Kx)) # X @ X.T is close to K_X Y = eiy @ diag(sqrt(eig_Ky)) n_feats = X.shape[1] linear = Linear(n_feats, ARD=True) white = White(n_feats) gp_model = GPR(X, Y, linear + white) gpflow.train.ScipyOptimizer().minimize(gp_model) Kx = linear.compute_K_symm(X) sigma_squared = white.variance.value P = Kx @ pdinv(Kx + sigma_squared * np.eye(T)) M = P @ Ky @ P O = np.ones((T, 1)) N = O @ np.diag(M).T D = np.sqrt(N + N.T - 2 * M) return D
def residual_kernel(K_Y: np.ndarray, K_X: np.ndarray, use_expectation=True, with_gp=True, sigma_squared=1e-3, return_learned_K_X=False): """Kernel matrix of residual of Y given X based on their kernel matrices, Y=f(X)""" import gpflow from gpflow.kernels import White, Linear from gpflow.models import GPR K_Y, K_X = centering(K_Y), centering(K_X) T = len(K_Y) if with_gp: eig_Ky, eiy = truncated_eigen(*eigdec(K_Y, min(100, T // 4))) eig_Kx, eix = truncated_eigen(*eigdec(K_X, min(100, T // 4))) X = eix @ diag(sqrt(eig_Kx)) # X @ X.T is close to K_X Y = eiy @ diag(sqrt(eig_Ky)) n_feats = X.shape[1] linear = Linear(n_feats, ARD=True) white = White(n_feats) gp_model = GPR(X, Y, linear + white) gpflow.train.ScipyOptimizer().minimize(gp_model) K_X = linear.compute_K_symm(X) sigma_squared = white.variance.value P = pdinv(np.eye(T) + K_X / sigma_squared) # == I-K @ inv(K+Sigma) in Zhang et al. 2011 if use_expectation: # Flaxman et al. 2016 Gaussian Processes for Independence Tests with Non-iid Data in Causal Inference. RK = (K_X + P @ K_Y) @ P else: # Zhang et al. 2011. Kernel-based Conditional Independence Test and Application in Causal Discovery. RK = P @ K_Y @ P if return_learned_K_X: return RK, K_X else: return RK
def test_combination_LMC_kernels(): N, D, P = 100, 3, 2 kernel_list1 = [Linear(active_dims=[1]), SquaredExponential()] L1 = len(kernel_list1) kernel_list2 = [SquaredExponential(), Linear(), Linear()] L2 = len(kernel_list2) k1 = LinearCoregionalization(kernel_list1, np.random.randn(P, L1)) k2 = LinearCoregionalization(kernel_list2, np.random.randn(P, L2)) kernel = k1 + k2 X = np.random.randn(N, D) K1 = k1(X, full_cov=True) K2 = k2(X, full_cov=True) K = kernel(X, full_cov=True) assert K.shape == [N, P, N, P] np.testing.assert_allclose(K, K1 + K2)
def compute_residual_eig(Y: np.ndarray, Kx: np.ndarray) -> np.ndarray: """Residual of Y based on Kx, a kernel matrix of X""" assert len(Y) == len(Kx) eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, len(Kx) // 4))) phi_X = eix @ np.diag(np.sqrt(eig_Kx)) # X @ X.T is close to K_X n_feats = phi_X.shape[1] linear_kernel = Linear(n_feats, ARD=True) gp_model = GPR(phi_X, Y, linear_kernel + White(n_feats)) gp_model.optimize() new_Kx = linear_kernel.compute_K_symm(phi_X) sigma_squared = gp_model.kern.white.variance.value[0] return (pdinv(np.eye(len(Kx)) + new_Kx / sigma_squared) @ Y).squeeze()
def init_model(self, Model, X, Y): """ Initialize the model TODO: Currently I'm coding in the choice of having purely a combo of Matern and Linear. We can make this flexible. """ Dx = X.shape[1] kern = Matern52(input_dim=len(self.matern_dims), active_dims=self.matern_dims, lengthscales=SETTINGS.lengthscales * Dx ** 0.5) + \ Linear(input_dim=len(self.linear_dims), active_dims=self.linear_dims) lik = Gaussian() lik.variance = SETTINGS.likelihood_variance gamma = kmeans2(X, self.M_gamma, minit='points')[ 0] if self.M_gamma > 0 else np.empty((0, Dx)) beta = kmeans2(X, self.M_beta, minit='points')[0] if self.M_gamma > 0: gamma_minibatch_size = SETTINGS.gamma_minibatch_size else gamma_minibatch_size = None self.model = Model(X, Y, kern, lik, gamma, beta, minibatch_size=SETTINGS.minibatch_size, gamma_minibatch_size=gamma_minibatch_size) self.sess = self.model.enquire_session()
def get_linear_kernel(original_X, current_X): X_dim = original_X.shape[1] Y_dim = current_X.shape[1] - X_dim k1 = RBF(input_dim=X_dim, active_dims=list(range(X_dim))) if Y_dim > 0: k_linear = Linear(input_dim=Y_dim, active_dims=list(range(X_dim, X_dim + Y_dim))) return k1 + k_linear return k1
def get_linear_input_dependent_kernel(original_X, current_X): X_dim = original_X.shape[1] Y_dim = current_X.shape[1] - X_dim k1 = RBF(input_dim=X_dim, active_dims=list(range(X_dim))) if Y_dim > 0: k2 = RationalQuadratic(input_dim=X_dim, active_dims=list(range(X_dim))) k_linear = Linear(input_dim=Y_dim, active_dims=list(range(X_dim, X_dim + Y_dim))) return k1 + k2 * k_linear return k1
def test_latent_kernels(): kernel_list = [SquaredExponential(), White(), White() + Linear()] multioutput_kernel_list = [ SharedIndependent(SquaredExponential(), 3), SeparateIndependent(kernel_list), LinearCoregionalization(kernel_list, np.random.random((5, 3))), ] assert len(multioutput_kernel_list[0].latent_kernels) == 1 assert multioutput_kernel_list[1].latent_kernels == tuple(kernel_list) assert multioutput_kernel_list[2].latent_kernels == tuple(kernel_list)
def test_kernel() -> None: """Tests implementation of the custom white kernel. It checks output tensor shapes and values. """ np.random.seed(0) x_a = np.random.randint(0, 5, 3) x_b = np.random.randint(0, 5, 2) x_c = np.random.randint(0, 5, 1) x_a = build_and_concat_label_mask(x_a, label=1) x_b = build_and_concat_label_mask(x_b, label=2) x_c = build_and_concat_label_mask(x_c, label=3) # Concatenate signals and sort by x[:, 0] x = np.vstack((x_a, x_b, x_c)) x = tf.convert_to_tensor(x, dtype=tf.float64) x2 = x[:-1, :] # White kernel noise_variances = tf.convert_to_tensor(0.1 * np.array([1.0, 2.0, 3.0]), dtype=tf.float64) linear_kernel = Linear(active_dims=[0]) multi_kernel = MultiWhiteKernel(labels=(1, 2, 3), variances=noise_variances, active_dims=[1]) n = 3 variances = tf.zeros((x.shape[0], ), dtype=tf.float64) for i in range(n): mask = tf.cast(tf.equal(x[:, -1], i + 1), tf.float64) mask_variances = mask * noise_variances[i] variances = variances + mask_variances # Test dimensions assert np.array_equal( linear_kernel(x).numpy().shape, multi_kernel(x).numpy().shape) assert np.array_equal( linear_kernel(x2).numpy().shape, multi_kernel(x2).numpy().shape) assert np.array_equal( linear_kernel(x, x2).numpy().shape, multi_kernel(x, x2).numpy().shape) assert np.array_equal( linear_kernel(x2, x).numpy().shape, multi_kernel(x2, x).numpy().shape) # Test variances assert np.linalg.norm(multi_kernel(x).numpy() - np.diag(variances)) < 1e-9
def make_mf_dgp(cls, X, Y, Z, add_linear=True, minibatch_size=None): """ Constructor for convenience. Constructs a mf-dgp model from training data and inducing point locations :param X: List of target :param Y: :param Z: :param add_linear: :return: """ n_fidelities = len(X) Din = X[0].shape[1] Dout = Y[0].shape[1] kernels = [RBF(Din, active_dims=list(range(Din)), variance=1., lengthscales=1, ARD=True)] for l in range(1, n_fidelities): D = Din + Dout D_range = list(range(D)) k_corr = RBF(Din, active_dims=D_range[:Din], lengthscales=1, variance=1.0, ARD=True) k_prev = RBF(Dout, active_dims=D_range[Din:], variance=1., lengthscales=1.0) k_in = RBF(Din, active_dims=D_range[:Din], variance=1., lengthscales=1, ARD=True) if add_linear: k_l = k_corr * (k_prev + Linear(Dout, active_dims=D_range[Din:], variance=1.)) + k_in else: k_l = k_corr * k_prev + k_in kernels.append(k_l) """ A White noise kernel is currently expected by Mf-DGP at all layers except the last. In cases where no noise is desired, this should be set to 0 and fixed, as follows: white = White(1, variance=0.) white.variance.trainable = False kernels[i] += white """ for i, kernel in enumerate(kernels[:-1]): kernels[i] += White(1, variance=1e-6) num_data = 0 for i in range(len(X)): _log.info('\nData at Fidelity {}'.format(i + 1)) _log.info('X - {}'.format(X[i].shape)) _log.info('Y - {}'.format(Y[i].shape)) _log.info('Z - {}'.format(Z[i].shape)) num_data += X[i].shape[0] layers = init_layers_mf(Y, Z, kernels, num_outputs=Dout) model = DGP_Base(X, Y, Gaussian(), layers, num_samples=10, minibatch_size=minibatch_size) return model
X = np.hstack((X, rng.randn(10, 1))) kernel1 = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[0]) # compute another kernel with additinoal inputs, # make sure out kernel is still okay. kernel2 = gpflow.kernels.SquaredExponential(active_dims=[1]) kernel_prod = kernel1 * kernel2 K1 = kernel_prod(X) K2 = kernel1(X) * kernel2(X) # slicing happens inside kernel assert np.allclose(K1, K2) _dim = 3 kernel_setups_extended = ( kernel_setups + [ SquaredExponential() + Linear(), SquaredExponential() * Linear(), SquaredExponential() + Linear(variance=rng.rand(_dim)), ] + [ArcCosine(order=order) for order in ArcCosine.implemented_orders] ) @pytest.mark.parametrize("kernel", kernel_setups_extended) @pytest.mark.parametrize("N, dim", [[30, _dim]]) def test_diags(kernel, N, dim): X = np.random.randn(N, dim) kernel1 = tf.linalg.diag_part(kernel(X, full_cov=True)) kernel2 = kernel(X, full_cov=False) assert np.allclose(kernel1, kernel2)
def initialize_model(x_s, y, m, q, mu=None, s=None, joint=False, infer_type="diag", xi_kern="RBF", x_st_infer=None, x_st_test=None): """ :param use_kronecker: :param t: Inputs (i.e. simulator inputs) :param x_s: spatiotemporal inputs (just temporal for KO...) :type x_s: Iterable of 2D np.ndarrays :param y: outputs :param m: :param q: :param joint: if true then we're training a joint model with 2 channels (output dimensions). Theya re assumed to be provided as column-concatenated in y. :param infer_type: How to infer latent variables. Options: * "diag": VI with diagonal Gaussian variational posterior * "full": VI will full Gaussian variational posterior * "mcmc": MCMC particle inference :return: """ n_s = np.prod([x_si.shape[0] for x_si in x_s]) # Providing transformed variables... if mu is None: if joint: y_pca = y[:, :n_s] # Inputs only else: y_pca = y mu = pca(y_pca, q) s = 0.1 * np.ones(mu.shape) supervised = False train_kl = True else: supervised = True train_kl = False if m == mu.shape[0]: z = mu else: z = None x = [mu] + x_s d_in = [x_i.shape[1] for x_i in x] with gpflow.defer_build(): # X -> Y kernels if xi_kern == "Linear": kern_list = [Linear(d_in[0], variance=0.01, ARD=True)] kern_list[-1].variance.transform = transforms.Logistic( 1.0e-12, 1.0) elif xi_kern == "RBF": kern_list = [RBF(d_in[0], lengthscales=np.sqrt(d_in[0]), ARD=True)] kern_list[-1].lengthscales.transform = transforms.Logistic( 1.0e-12, 1000.0) kern_list[-1].lengthscales.prior = Gamma(mu=2.0, var=1.0) elif xi_kern == "Sum": kern_list = [ gpflow.kernels.Sum([ RBF(d_in[0], lengthscales=np.sqrt(d_in[0]), ARD=True), Linear(d_in[0], variance=0.01, ARD=True) ]) ] kern_list[-1].kernels[0].lengthscales.transform = \ transforms.Logistic(1.0e-12, 1000.0) kern_list[-1].kernels[0].lengthscales.prior = Gamma(mu=2.0, var=1.0) kern_list[-1].kernels[1].variance.transform = \ transforms.Logistic(1.0e-12, 1.0) else: raise NotImplementedError("Unknown xi kernel {}".format(xi_kern)) for d_in_i in d_in[1:]: kern_list.append( Exponential(d_in_i, lengthscales=np.sqrt(d_in_i), ARD=True)) for kern in kern_list[1:]: kern.lengthscales = 0.1 # Restructure the inputs for the SGPLVM: if joint: y_structured = np.concatenate((y[:, :n_s].reshape( (-1, 1)), y[:, n_s:].reshape((-1, 1))), 1) else: y_structured = y.reshape((-1, 1)) # Initialize model: model_types = {"diag": Sgplvm, "full": SgplvmFullCovInfer} if infer_type not in model_types: raise NotImplementedError( "No suport for infer_type {}".format(infer_type)) else: kgplvm = model_types[infer_type] model = kgplvm(x, s, y_structured, kern_list, m, z, train_kl=train_kl, x_st_infer=x_st_infer, x_st_test=x_st_test) model.likelihood.variance = 1.0e-2 * np.var(y) if supervised: # Lock provided inputs model.X0.trainable = False model.h_s.trainable = False model.compile() return model
##################################### ###### Test Dataset Parameters ###### ##################################### ip = 0. # Intervention point dc = 1.0 # Discontinuity sigma = 0.5 # Standard deviation sigma_d = 0. # Value added to the standard deviation after the intervention point n = 20 # Number of data points ############################ ###### Kernel Options ###### ############################ Matern = Matern32() linear_kernel = Linear() + Constant() # "Linear" kernel exp_kernel = Exponential() RBF_kernel = SquaredExponential() kernel_names = ['Linear', 'Exponential', 'Gaussian', 'Matern', 'BMA'] kernels = [linear_kernel, exp_kernel, RBF_kernel, Matern] # make a dictionary that zips the kernel names with the corresponding kernel kernel_dict = dict(zip( kernel_names, kernels)) # making a dictionary of kernels and their corresponding names ########################################### ###### Generation of Test Dataset ###### ########################################### def get_predictors(n):
X = np.hstack((X, rng.randn(10, 1))) kernel1 = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[0]) # compute another kernel with additinoal inputs, # make sure out kernel is still okay. kernel2 = gpflow.kernels.SquaredExponential(active_dims=[1]) kernel_prod = kernel1 * kernel2 K1 = kernel_prod(X) K2 = kernel1(X) * kernel2(X) # slicing happens inside kernel assert np.allclose(K1, K2) _dim = 3 kernel_setups_extended = kernel_setups + [ SquaredExponential() + Linear(), SquaredExponential() * Linear(), SquaredExponential() + Linear(ard=True, variance=rng.rand(_dim, 1).reshape(-1)) ] + [ArcCosine(order=order) for order in ArcCosine.implemented_orders] @pytest.mark.parametrize('kernel', kernel_setups_extended) @pytest.mark.parametrize('N, dim', [[30, _dim]]) def test_diags(kernel, N, dim): X = np.random.randn(N, dim) kernel1 = tf.linalg.diag_part(kernel(X, full=True)) kernel2 = kernel(X, full=False) assert np.allclose(kernel1, kernel2)
class Container(dict, tf.Module): def __init__(self): super().__init__() res = { 'ckv': list(), 'ckl': list(), 'clv': list(), 'dkv': list(), 'dkl': list(), 'dlv': list() } kernels = [Linear() + Constant(), RBF(), Matern32(), Exponential()] SHOW_PLOTS = 1 epochs = 5 container = Container() for e in range(epochs): print(f'epoch {e}') np.random.seed(e) ''' n = 100 # Number of data points x = np.linspace(-3, 3, n) # Evenly distributed x values