def test_separate_independent_mok(session_tf): """ We use different independent kernels for each of the output dimensions. We can achieve this in two ways: 1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof 2) inefficient: SeparateIndependentMok with InducingPoints However, both methods should return the same conditional, and after optimization return the same log likelihood. """ # Model 1 (INefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kern_list_1 = [ RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_1 = mk.SeparateIndependentMok(kern_list_1) feature_1 = InducingPoints(Data.X[:Data.M, ...].copy()) m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) m1.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list_2 = [ RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_2 = mk.SeparateIndependentMok(kern_list_2) feature_2 = mf.SharedIndependentMof( InducingPoints(Data.X[:Data.M, ...].copy())) m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) m2.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER) check_equality_predictions(session_tf, [m1, m2])
def __init__(self, dim, input_dim=0, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.OBSERVATIONS_AS_INPUT = False self.dim = dim self.input_dim = input_dim self.jitter = jitter self.Q_sqrt = Param(np.ones(self.dim) if Q_diag is None else Q_diag ** 0.5, transform=gtf.positive) self.n_ind_pts = n_ind_pts if Z is None else (Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [np.random.randn(self.n_ind_pts, self.dim + self.input_dim) for _ in range(self.dim)] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof([gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.dim) else: kern_list = kern or [gp.kernels.Matern32(self.dim + self.input_dim, ARD=True) for _ in range(self.dim)] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.dim) self.Umu = Param(np.zeros((self.dim, self.n_ind_pts)) if Umu is None else Umu) # Lm^-1(Umu - m(Z)) transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.dim, squeeze=False) self.Ucov_chol = Param(np.tile(np.eye(self.n_ind_pts)[None, ...], [self.dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=transform) # Lm^-1(Ucov_chol) self._Kzz = None
def test_sample_conditional_mixedkernel(session_tf): q_mu = np.random.randn(Data.M, Data.L) # M x L q_sqrt = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.L) ]) # L x M x M Z = Data.X[:Data.M, ...] # M x D N = int(10e5) Xs = np.ones((N, Data.D), dtype=float_type) values = {"Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt} placeholders = _create_placeholder_dict(values) feed_dict = _create_feed_dict(placeholders, values) # Path 1: mixed kernel: most efficient route W = np.random.randn(Data.P, Data.L) mixed_kernel = mk.SeparateMixedMok([RBF(Data.D) for _ in range(Data.L)], W) mixed_feature = mf.MixedKernelSharedMof(InducingPoints(Z.copy())) sample = sample_conditional(placeholders["Xnew"], mixed_feature, mixed_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value = session_tf.run(sample, feed_dict=feed_dict) # Path 2: independent kernels, mixed later separate_kernel = mk.SeparateIndependentMok( [RBF(Data.D) for _ in range(Data.L)]) shared_feature = mf.SharedIndependentMof(InducingPoints(Z.copy())) sample2 = sample_conditional(placeholders["Xnew"], shared_feature, separate_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value2 = session_tf.run(sample2, feed_dict=feed_dict) value2 = np.matmul(value2, W.T) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value, axis=0), np.mean(value2, axis=0), decimal=1) np.testing.assert_array_almost_equal(np.cov(value, rowvar=False), np.cov(value2, rowvar=False), decimal=1)
def test_separate_independent_mof(session_tf): """ Same test as above but we use different (i.e. separate) inducing features for each of the output dimensions. """ np.random.seed(0) # Model 1 (INefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kernel_1 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feature_1 = InducingPoints(Data.X[:Data.M, ...].copy()) m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) m1.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_2 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feat_list_2 = [ InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P) ] feature_2 = mf.SeparateIndependentMof(feat_list_2) m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) m2.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER) # Model 3 (Inefficient): an idenitical feature is used P times, # and treated as a separate feature. q_mu_3 = np.random.randn(Data.M, Data.P) q_sqrt_3 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list = [ RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_3 = mk.SeparateIndependentMok(kern_list) feat_list_3 = [ InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P) ] feature_3 = mf.SeparateIndependentMof(feat_list_3) m3 = SVGP(Data.X, Data.Y, kernel_3, Gaussian(), feature_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3) m3.set_trainable(False) m3.q_sqrt.set_trainable(True) m3.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER) check_equality_predictions(session_tf, [m1, m2, m3])
''' seperate kernels in the output with share indepenent features # Create list of kernels for each output# Create kern_list = [gp.kernels.RBF(input_dim=dimX, active_dims=len_sc, ARD=True) for _ in range(dimY)] # Create multioutput kernel from kernel list kernel = mk.SeparateIndependentMok(kern_list) feature = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) f_name += "_SepKernShaFeat_"+str(N1) ''' '''Separate Independent Kernel & Separate Independent Features''' # Create list of kernels for each output kern_list = [ gp.kernels.Matern32(input_dim=dimX, active_dims=len_sc, ARD=True) for _ in range(dimY) ] # Create multioutput kernel from kernel list kernel = mk.SeparateIndependentMok(kern_list) # initialisation of inducing input locations, one set of locations per output xtrain = np.array(xtrain) Zs = [xtrain[np.random.permutation(N1)[:M]].copy() for _ in range(dimY)] # initialise as list inducing features feature_list = [gp.features.InducingPoints(Z) for Z in Zs] # create multioutput features from feature_list feature = mf.SeparateIndependentMof(feature_list) f_name += "_SepKernSepFeat_Matern32" + str(N1) # create SVGP model as usual and optimize model = gp.models.SVGP(xtrain, ytrain, kernel, gp.likelihoods.Gaussian(), feat=feature,
def __init__(self, latent_dim, Y, inputs=None, emissions=None, px1_mu=None, px1_cov=None, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, qx1_mu=None, qx1_cov=None, As=None, bs=None, Ss=None, n_samples=100, seed=None, parallel_iterations=10, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.latent_dim = latent_dim self.T, self.obs_dim = Y.shape self.Y = Param(Y, trainable=False) self.inputs = None if inputs is None else Param(inputs, trainable=False) self.input_dim = 0 if self.inputs is None else self.inputs.shape[1] self.qx1_mu = Param( np.zeros(self.latent_dim) if qx1_mu is None else qx1_mu) self.qx1_cov_chol = Param( np.eye(self.latent_dim) if qx1_cov is None else np.linalg.cholesky(qx1_cov), transform=gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.As = Param( np.ones((self.T - 1, self.latent_dim)) if As is None else As) self.bs = Param( np.zeros((self.T - 1, self.latent_dim)) if bs is None else bs) self.Q_sqrt = Param( np.ones(self.latent_dim) if Q_diag is None else Q_diag**0.5, transform=gtf.positive) if Ss is False: self._S_chols = None else: self.S_chols = Param( np.tile(self.Q_sqrt.value.copy()[None, ...], [self.T - 1, 1]) if Ss is None else (np.sqrt(Ss) if Ss.ndim == 2 else np.linalg.cholesky(Ss)), transform=gtf.positive if (Ss is None or Ss.ndim == 2) else gtf.LowerTriangular( self.latent_dim, num_matrices=self.T - 1, squeeze=False)) self.emissions = emissions or GaussianEmissions( latent_dim=self.latent_dim, obs_dim=self.obs_dim) self.px1_mu = Param( np.zeros(self.latent_dim) if px1_mu is None else px1_mu, trainable=False) self.px1_cov_chol = None if px1_cov is None else \ Param(np.sqrt(px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov), trainable=False, transform=gtf.positive if px1_cov.ndim == 1 else gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.n_samples = n_samples self.seed = seed self.parallel_iterations = parallel_iterations self.jitter = jitter # Inference-specific attributes (see gpssm_models.py for appropriate choices): nans = tf.constant(np.zeros( (self.T, self.n_samples, self.latent_dim)) * np.nan, dtype=gps.float_type) self.sample_fn = lambda **kwargs: (nans, None) self.sample_kwargs = {} self.KL_fn = lambda *fs: tf.constant(np.nan, dtype=gps.float_type) # GP Transitions: self.n_ind_pts = n_ind_pts if Z is None else ( Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [ np.random.randn(self.n_ind_pts, self.latent_dim + self.input_dim) for _ in range(self.latent_dim) ] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof( [gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.latent_dim) else: kern_list = kern or [ gp.kernels.Matern32(self.latent_dim + self.input_dim, ARD=True) for _ in range(self.latent_dim) ] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.latent_dim) self.Umu = Param( np.zeros((self.latent_dim, self.n_ind_pts)) if Umu is None else Umu) # (Lm^-1)(Umu - m(Z)) LT_transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.latent_dim, squeeze=False) self.Ucov_chol = Param(np.tile( np.eye(self.n_ind_pts)[None, ...], [self.latent_dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=LT_transform) # (Lm^-1)Lu self._Kzz = None
def separate_independent(self, num=Datum.L): return mk.SeparateIndependentMok(make_kernels(num))