def test_MixedKernelSeparateMof(session_tf): data = DataMixedKernel kern_list = [RBF(data.D) for _ in range(data.L)] feat_list = [ InducingPoints(data.X[:data.M, ...].copy()) for _ in range(data.L) ] k1 = mk.SeparateMixedMok(kern_list, W=data.W) f1 = mf.SeparateIndependentMof(feat_list) m1 = SVGP(data.X, data.Y, k1, Gaussian(), feat=f1, q_mu=data.mu_data, q_sqrt=data.sqrt_data) kern_list = [RBF(data.D) for _ in range(data.L)] feat_list = [ InducingPoints(data.X[:data.M, ...].copy()) for _ in range(data.L) ] k2 = mk.SeparateMixedMok(kern_list, W=data.W) f2 = mf.MixedKernelSeparateMof(feat_list) m2 = SVGP(data.X, data.Y, k2, Gaussian(), feat=f2, q_mu=data.mu_data, q_sqrt=data.sqrt_data) check_equality_predictions(session_tf, [m1, m2])
def __init__(self, dim, input_dim=0, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.OBSERVATIONS_AS_INPUT = False self.dim = dim self.input_dim = input_dim self.jitter = jitter self.Q_sqrt = Param(np.ones(self.dim) if Q_diag is None else Q_diag ** 0.5, transform=gtf.positive) self.n_ind_pts = n_ind_pts if Z is None else (Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [np.random.randn(self.n_ind_pts, self.dim + self.input_dim) for _ in range(self.dim)] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof([gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.dim) else: kern_list = kern or [gp.kernels.Matern32(self.dim + self.input_dim, ARD=True) for _ in range(self.dim)] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.dim) self.Umu = Param(np.zeros((self.dim, self.n_ind_pts)) if Umu is None else Umu) # Lm^-1(Umu - m(Z)) transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.dim, squeeze=False) self.Ucov_chol = Param(np.tile(np.eye(self.n_ind_pts)[None, ...], [self.dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=transform) # Lm^-1(Ucov_chol) self._Kzz = None
def test_separate_independent_mof(session_tf): """ Same test as above but we use different (i.e. separate) inducing features for each of the output dimensions. """ np.random.seed(0) # Model 1 (INefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kernel_1 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feature_1 = InducingPoints(Data.X[:Data.M, ...].copy()) m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) m1.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_2 = mk.SharedIndependentMok( RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P) feat_list_2 = [ InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P) ] feature_2 = mf.SeparateIndependentMof(feat_list_2) m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) m2.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER) # Model 3 (Inefficient): an idenitical feature is used P times, # and treated as a separate feature. q_mu_3 = np.random.randn(Data.M, Data.P) q_sqrt_3 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list = [ RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_3 = mk.SeparateIndependentMok(kern_list) feat_list_3 = [ InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P) ] feature_3 = mf.SeparateIndependentMof(feat_list_3) m3 = SVGP(Data.X, Data.Y, kernel_3, Gaussian(), feature_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3) m3.set_trainable(False) m3.q_sqrt.set_trainable(True) m3.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER) check_equality_predictions(session_tf, [m1, m2, m3])
''' '''Separate Independent Kernel & Separate Independent Features''' # Create list of kernels for each output kern_list = [ gp.kernels.Matern32(input_dim=dimX, active_dims=len_sc, ARD=True) for _ in range(dimY) ] # Create multioutput kernel from kernel list kernel = mk.SeparateIndependentMok(kern_list) # initialisation of inducing input locations, one set of locations per output xtrain = np.array(xtrain) Zs = [xtrain[np.random.permutation(N1)[:M]].copy() for _ in range(dimY)] # initialise as list inducing features feature_list = [gp.features.InducingPoints(Z) for Z in Zs] # create multioutput features from feature_list feature = mf.SeparateIndependentMof(feature_list) f_name += "_SepKernSepFeat_Matern32" + str(N1) # create SVGP model as usual and optimize model = gp.models.SVGP(xtrain, ytrain, kernel, gp.likelihoods.Gaussian(), feat=feature, whiten=False) model.likelihood.variance = 1e-10 model.likelihood.variance.trainable = False opt = gp.train.ScipyOptimizer(tol=1e-12, options={
def __init__(self, latent_dim, Y, inputs=None, emissions=None, px1_mu=None, px1_cov=None, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, qx1_mu=None, qx1_cov=None, As=None, bs=None, Ss=None, n_samples=100, seed=None, parallel_iterations=10, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.latent_dim = latent_dim self.T, self.obs_dim = Y.shape self.Y = Param(Y, trainable=False) self.inputs = None if inputs is None else Param(inputs, trainable=False) self.input_dim = 0 if self.inputs is None else self.inputs.shape[1] self.qx1_mu = Param( np.zeros(self.latent_dim) if qx1_mu is None else qx1_mu) self.qx1_cov_chol = Param( np.eye(self.latent_dim) if qx1_cov is None else np.linalg.cholesky(qx1_cov), transform=gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.As = Param( np.ones((self.T - 1, self.latent_dim)) if As is None else As) self.bs = Param( np.zeros((self.T - 1, self.latent_dim)) if bs is None else bs) self.Q_sqrt = Param( np.ones(self.latent_dim) if Q_diag is None else Q_diag**0.5, transform=gtf.positive) if Ss is False: self._S_chols = None else: self.S_chols = Param( np.tile(self.Q_sqrt.value.copy()[None, ...], [self.T - 1, 1]) if Ss is None else (np.sqrt(Ss) if Ss.ndim == 2 else np.linalg.cholesky(Ss)), transform=gtf.positive if (Ss is None or Ss.ndim == 2) else gtf.LowerTriangular( self.latent_dim, num_matrices=self.T - 1, squeeze=False)) self.emissions = emissions or GaussianEmissions( latent_dim=self.latent_dim, obs_dim=self.obs_dim) self.px1_mu = Param( np.zeros(self.latent_dim) if px1_mu is None else px1_mu, trainable=False) self.px1_cov_chol = None if px1_cov is None else \ Param(np.sqrt(px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov), trainable=False, transform=gtf.positive if px1_cov.ndim == 1 else gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.n_samples = n_samples self.seed = seed self.parallel_iterations = parallel_iterations self.jitter = jitter # Inference-specific attributes (see gpssm_models.py for appropriate choices): nans = tf.constant(np.zeros( (self.T, self.n_samples, self.latent_dim)) * np.nan, dtype=gps.float_type) self.sample_fn = lambda **kwargs: (nans, None) self.sample_kwargs = {} self.KL_fn = lambda *fs: tf.constant(np.nan, dtype=gps.float_type) # GP Transitions: self.n_ind_pts = n_ind_pts if Z is None else ( Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [ np.random.randn(self.n_ind_pts, self.latent_dim + self.input_dim) for _ in range(self.latent_dim) ] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof( [gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.latent_dim) else: kern_list = kern or [ gp.kernels.Matern32(self.latent_dim + self.input_dim, ARD=True) for _ in range(self.latent_dim) ] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.latent_dim) self.Umu = Param( np.zeros((self.latent_dim, self.n_ind_pts)) if Umu is None else Umu) # (Lm^-1)(Umu - m(Z)) LT_transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.latent_dim, squeeze=False) self.Ucov_chol = Param(np.tile( np.eye(self.n_ind_pts)[None, ...], [self.latent_dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=LT_transform) # (Lm^-1)Lu self._Kzz = None
def separate_independent(self, num=Datum.P): return mf.SeparateIndependentMof(make_ips(num))