def construct_mean_function(X: np.ndarray, D_in: int, D_out: int) -> gpflow.mean_functions.MeanFunction: """ Return :class:`gpflow.mean_functions.Identity` when ``D_in`` and ``D_out`` are equal. Otherwise, use the principal components of the inputs matrix ``X`` to build a :class:`~gpflow.mean_functions.Linear` mean function. .. note:: The returned mean function is set to be untrainable. To change this, use :meth:`gpflow.set_trainable`. :param X: A data array with the shape ``[N, D_in]`` used to determine the principal components to use to create a :class:`~gpflow.mean_functions.Linear` mean function when ``D_in != D_out``. :param D_in: The dimensionality of the input data (or features) ``X``. Typically, this corresponds to ``X.shape[-1]``. :param D_out: The dimensionality of the outputs (or targets) ``Y``. Typically, this corresponds to ``Y.shape[-1]`` or the number of latent GPs in the layer. """ assert X.shape[-1] == D_in if D_in == D_out: mean_function = gpflow.mean_functions.Identity() else: if D_in > D_out: _, _, V = np.linalg.svd(X, full_matrices=False) W = V[:D_out, :].T else: W = np.concatenate( [np.eye(D_in), np.zeros((D_in, D_out - D_in))], axis=1) assert W.shape == (D_in, D_out) mean_function = gpflow.mean_functions.Linear(W) gpflow.set_trainable(mean_function, False) return mean_function
def build_model(data): variance = tf.math.reduce_variance(data.observations) kernel = gpflow.kernels.Matern52(variance=variance, lengthscales=[0.2, 0.2]) prior_scale = tf.cast(1.0, dtype=tf.float64) kernel.variance.prior = tfp.distributions.LogNormal( tf.cast(-2.0, dtype=tf.float64), prior_scale) kernel.lengthscales.prior = tfp.distributions.LogNormal( tf.math.log(kernel.lengthscales), prior_scale) gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5) gpflow.set_trainable(gpr.likelihood, False) return GPflowModelConfig( **{ "model": gpr, "model_args": { "num_kernel_samples": 100, }, "optimizer": gpflow.optimizers.Scipy(), "optimizer_args": { "minimize_args": { "options": dict(maxiter=100) }, }, })
def init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=Zero(), num_outputs=None, Layer=SVGPLayer, whiten=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for in_idx, kern_in in enumerate(kernels[:-1]): dim_in = layer_sizes[in_idx] dim_out = layer_sizes[in_idx+1] # Initialize mean function to be either Identity or PCA projection if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection # use eigenvectors corresponding to dim_out largest eigenvalues _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) gpflow.set_trainable(mf.A, False) gpflow.set_trainable(mf.b, False) layers.append(Layer(kern_in, Z_running, dim_out, mf, white=whiten)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(Layer(kernels[-1], Z_running, num_outputs, mean_function, white=whiten)) return layers
def test_multiclass(): num_classes = 3 model = gpflow.models.SVGP( gpflow.kernels.SquaredExponential(), gpflow.likelihoods.MultiClass(num_classes=num_classes), inducing_variable=Datum.X.copy(), num_latent_gps=num_classes, ) gpflow.set_trainable(model.inducing_variable, False) # test with explicitly unknown shapes: tensor_spec = tf.TensorSpec(shape=None, dtype=default_float()) elbo = tf.function( model.elbo, input_signature=[(tensor_spec, tensor_spec)], ) @tf.function def model_closure(): return -elbo(Datum.cdata) opt = gpflow.optimizers.Scipy() # simply test whether it runs without erroring...: opt.minimize( model_closure, variables=model.trainable_variables, options=dict(maxiter=3), compile=True, )
def _init_layers(self, X, Y, Z, q_sqrt_initial, kernels, mean_function=Zero(), Layer=SVGPLayer, white=False): """ The first layer only models between input and output_1, The second layer models between input and output_2, output_1 and output_2, The inducing point for each layer for input dimension should be shared? The induing point for output dimension should be calculated instead of changing?""" layers = [] num_inputs = X.shape[1] num_outputs = Y.shape[1] self.inducing_inputs = inducingpoint_wrapper(Z[:, :num_inputs]) gpflow.set_trainable(self.inducing_inputs, False) inducing_inputs = self.inducing_inputs.Z for i in range(num_outputs): layer = Layer(kernels[i], inducing_inputs, Z[:, num_inputs + i], q_sqrt_initial[:, i], mean_function, white=white) layers.append(layer) inducing_inputs = tf.concat([inducing_inputs, layer.q_mu], axis=1) return layers
def build_model(data): variance = tf.math.reduce_variance(data.observations) kernel = gpflow.kernels.RBF(variance=variance, lengthscales=[2, 2]) gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5) gpflow.set_trainable(gpr.likelihood, False) return GaussianProcessRegression(gpr)
def model_and_loss(data) -> Tuple[tf.keras.models.Model, tf.keras.losses.Loss]: """ Builds a two-layer deep GP model. """ X, Y = data num_data, input_dim = X.shape layer1 = construct_gp_layer( num_data, CONFIG.num_inducing, input_dim, CONFIG.hidden_dim, name="gp0" ) output_dim = Y.shape[-1] layer2 = construct_gp_layer( num_data, CONFIG.num_inducing, CONFIG.hidden_dim, output_dim, name="gp1" ) likelihood = gpflow.likelihoods.Gaussian(CONFIG.likelihood_variance) gpflow.set_trainable(likelihood.variance, False) X = tf.keras.Input((input_dim,)) f1 = layer1(X) f2 = layer2(f1) # We add a dummy layer so that the likelihood variance is discovered as trainable: likelihood_container = gpflux.layers.TrackableLayer() likelihood_container.likelihood = likelihood y = likelihood_container(f2) loss = gpflux.losses.LikelihoodLoss(likelihood) return tf.keras.Model(inputs=X, outputs=y), loss
def create_regression_model(data): variance = tf.math.reduce_variance(data.observations) kernel = gpflow.kernels.Matern52(variance=variance, lengthscales=[0.2, 0.2]) gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5) gpflow.set_trainable(gpr.likelihood, False) return gpr
def get_covariance_function(): gp_dtype = gpf.config.default_float() # Matern 32 m32_cov = Matern32(variance=1, lengthscales=100.) m32_cov.variance.prior = Normal(gp_dtype(1.), gp_dtype(0.1)) m32_cov.lengthscales.prior = Normal(gp_dtype(100.), gp_dtype(50.)) # Periodic base kernel periodic_base_cov = SquaredExponential(variance=5., lengthscales=1.) set_trainable(periodic_base_cov.variance, False) periodic_base_cov.lengthscales.prior = Normal(gp_dtype(5.), gp_dtype(1.)) # Periodic periodic_cov = Periodic(periodic_base_cov, period=1., order=FLAGS.qp_order) set_trainable(periodic_cov.period, False) # Periodic damping periodic_damping_cov = Matern32(variance=1e-1, lengthscales=50) periodic_damping_cov.variance.prior = Normal(gp_dtype(1e-1), gp_dtype(1e-3)) periodic_damping_cov.lengthscales.prior = Normal(gp_dtype(50), gp_dtype(10.)) # Final covariance co2_cov = periodic_cov * periodic_damping_cov + m32_cov return co2_cov
def train_SGPR( model: gpflow.models.SGPR, epochs: int, optimizer: tf.optimizers = tf.optimizers.Adam(learning_rate=0.1), logging_epoch_freq: int = 10, epoch_var: Optional[tf.Variable] = None, ): """ Training loop for Sparse GP """ set_trainable(model.mean_function, False) tf_optimization_step = tf.function(optimization_exact) loss = list() for epoch in range(epochs): tf_optimization_step(model) if epoch_var is not None: epoch_var.assign(epoch + 1) epoch_id = epoch + 1 loss.append(model.training_loss()) if epoch_id % logging_epoch_freq == 0: tf.print(f"Epoch {epoch_id}: LOSS (train) {model.training_loss()}") plt.plot(range(epochs), loss) plt.xlabel('Epoch', fontsize=25) plt.ylabel('Loss', fontsize=25) plt.tight_layout()
def test_svgp(whiten, q_diag): model = gpflow.models.SVGP( gpflow.kernels.SquaredExponential(), gpflow.likelihoods.Gaussian(), inducing_variable=Datum.X.copy(), q_diag=q_diag, whiten=whiten, mean_function=gpflow.mean_functions.Constant(), num_latent_gps=Datum.Y.shape[1], ) gpflow.set_trainable(model.inducing_variable, False) # test with explicitly unknown shapes: tensor_spec = tf.TensorSpec(shape=None, dtype=default_float()) elbo = tf.function(model.elbo, input_signature=[(tensor_spec, tensor_spec)],) @tf.function def model_closure(): return -elbo(Datum.data) opt = gpflow.optimizers.Scipy() # simply test whether it runs without erroring...: opt.minimize( model_closure, variables=model.trainable_variables, options=dict(maxiter=3), compile=True, )
def checkpointing_train_SGPR( model: gpflow.models.SGPR, X: tf.Tensor, Y: tf.Tensor, epochs: int, manager: tf.train.CheckpointManager, optimizer: tf.optimizers = tf.optimizers.Adam(learning_rate=0.1), logging_epoch_freq: int = 10, epoch_var: Optional[tf.Variable] = None, exp_tag: str = 'test', ): """ Training loop for Sparse GP with checkpointing """ set_trainable(model.mean_function, False) tf_optimization_step = tf.function(optimization_exact) loss = list() for epoch in range(epochs): tf_optimization_step(model) if epoch_var is not None: epoch_var.assign(epoch + 1) epoch_id = epoch + 1 loss.append(model.training_loss()) if epoch_id % logging_epoch_freq == 0: ckpt_path = manager.save() tf.print( f"Epoch {epoch_id}: LOSS (train) {model.training_loss()}, saved at {ckpt_path}" ) tf.print(f"MSE: {mean_squared_error(Y, model.predict_y(X)[0])}") plt.plot(range(epochs), loss) plt.xlabel('Epoch', fontsize=25) plt.ylabel('Loss', fontsize=25) plt.tight_layout()
def _gp_train(self, x, y): assert x.shape[0] == y.shape[0] assert x.ndim == 2 and y.ndim == 2 if self.gpflow_model is None: # if None, init model self.gpflow_model = gpflow.models.VGP( data=(x, y), kernel=self.gp_kernel, mean_function=self.gp_meanf, likelihood=self.likelihood, num_latent_gps=1, ) else: # just assign new data self.gpflow_model.data = (x, y) # training loop gpflow.set_trainable(self.gpflow_model.q_mu, False) gpflow.set_trainable(self.gpflow_model.q_sqrt, False) for i in range(self.train_iters): self.natgrad_optimiser.minimize( self.gpflow_model.training_loss, [(self.gpflow_model.q_mu, self.gpflow_model.q_sqrt)], ) self.optimiser.minimize( self.gpflow_model.training_loss, self.gpflow_model.trainable_variables, ) logging.debug( f"VGP iteration {i+1}. ELBO: {self.gpflow_model.elbo():.04f}")
def build_gp_model(data, x_std=1.0, y_std=0.1): dim = data.query_points.shape[-1] empirical_variance = tf.math.reduce_variance(data.observations) prior_lengthscales = [0.2 * x_std * np.sqrt(dim)] * dim prior_scale = tf.cast(1.0, dtype=tf.float64) x_std = tf.cast(x_std, dtype=tf.float64) y_std = tf.cast(y_std, dtype=tf.float64) kernel = gpflow.kernels.Matern52( variance=empirical_variance, lengthscales=prior_lengthscales, ) kernel.variance.prior = tfp.distributions.LogNormal( tf.math.log(y_std), prior_scale) kernel.lengthscales.prior = tfp.distributions.LogNormal( tf.math.log(kernel.lengthscales), prior_scale) gpr = gpflow.models.GPR( data.astuple(), kernel, mean_function=gpflow.mean_functions.Constant(), noise_variance=1e-5, ) gpflow.set_trainable(gpr.likelihood, False) return GaussianProcessRegression( model=gpr, optimizer=Optimizer(gpflow.optimizers.Scipy(), minimize_args={"options": dict(maxiter=100)}), num_kernel_samples=100, )
def make_kernel_likelihood_iv(): kernel = gpflow.kernels.SquaredExponential(variance=0.7, lengthscales=0.6) likelihood = gpflow.likelihoods.Gaussian(variance=0.08) Z = np.linspace(0, 6, 20)[:, np.newaxis] inducing_variable = gpflow.inducing_variables.InducingPoints(Z) gpflow.set_trainable(inducing_variable, False) return kernel, likelihood, inducing_variable
def create_classification_model(data): kernel = gpflow.kernels.SquaredExponential(variance=100.0, lengthscales=[0.2, 0.2]) likelihood = gpflow.likelihoods.Bernoulli() vgp = gpflow.models.VGP(data.astuple(), kernel, likelihood) gpflow.set_trainable(vgp.kernel.variance, False) return vgp
def optimize(self): set_trainable(self.model.q_mu, False) set_trainable(self.model.q_sqrt, False) variational_params = [(self.model.q_mu, self.model.q_sqrt)] adam_opt = tf.optimizers.Adam(1e-3) natgrad_opt = NaturalGradient(gamma=0.1) for step in range(100): natgrad_opt.minimize(self.model.training_loss, var_list=variational_params) adam_opt.minimize(self.model.training_loss, var_list=self.model.trainable_variables)
def optimize(self, dataset): gpflow.set_trainable(self.model.q_mu, False) gpflow.set_trainable(self.model.q_sqrt, False) variational_params = [(self.model.q_mu, self.model.q_sqrt)] adam_opt = tf.optimizers.Adam(1e-3) natgrad_opt = gpflow.optimizers.NaturalGradient(gamma=0.1) for step in range(50): loss = self.model.training_loss natgrad_opt.minimize(loss, variational_params) adam_opt.minimize(loss, self.model.trainable_variables)
def optimize_policy(self, maxiter=50, restarts=1): ''' Optimize controller's parameter's ''' start = time.time() mgpr_trainable_params = self.mgpr.trainable_parameters for param in mgpr_trainable_params: set_trainable(param, False) if not self.optimizer: self.optimizer = gpflow.optimizers.Scipy() self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter)) # self.optimizer = tf.optimizers.Adam() # self.optimizer.minimize(self.training_loss, self.trainable_variables) else: self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter)) # self.optimizer.minimize(self.training_loss, self.trainable_variables) end = time.time() print( "Controller's optimization: done in %.1f seconds with reward=%.3f." % (end - start, self.compute_reward())) restarts -= 1 best_parameter_values = [ param.numpy() for param in self.trainable_parameters ] best_reward = self.compute_reward() for restart in range(restarts): self.controller.randomize() start = time.time() self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter)) # self.optimizer.minimize(self.training_loss, self.trainable_variables) end = time.time() reward = self.compute_reward() print( "Controller's optimization: done in %.1f seconds with reward=%.3f." % (end - start, self.compute_reward())) if reward > best_reward: best_parameter_values = [ param.numpy() for param in self.trainable_parameters ] best_reward = reward for i, param in enumerate(self.trainable_parameters): param.assign(best_parameter_values[i]) end = time.time() for param in mgpr_trainable_params: set_trainable(param, True)
def build_model(data, kernel_func=None): """kernel_func should be a function that takes variance as a single input parameter""" variance = tf.math.reduce_variance(data.observations) if kernel_func is None: kernel = gpflow.kernels.Matern52(variance=variance) else: kernel = kernel_func(variance) gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5) gpflow.set_trainable(gpr.likelihood, False) return GaussianProcessRegression(gpr)
def __init__(self, state_dim, control_dim, num_basis_functions, max_action=1.0): MGPR.__init__(self, [ np.random.randn(num_basis_functions, state_dim), 0.1 * np.random.randn(num_basis_functions, control_dim) ]) for model in self.models: model.kernel.variance.assign(1.0) set_trainable(model.kernel.variance, False) self.max_action = max_action
def build_model(data): variance = tf.math.reduce_variance(data.observations) kernel = gpflow.kernels.Matern52(variance=variance, lengthscales=[0.2, 0.2]) gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5) gpflow.set_trainable(gpr.likelihood, False) return {OBJECTIVE: { "model": gpr, "optimizer": gpflow.optimizers.Scipy(), "optimizer_args": { "minimize_args": {"options": dict(maxiter=100)}, }, }}
def fit(self, X: np.ndarray, Y: np.ndarray): """ Initiate the individual experts and fit their shared hyperparameters by minimizing the sum of negative ELBOs Inputs : -- X, dimension: n_train_points x dim_x : Training inputs -- Y, dimension: n_train_points x 1 : Training Labels """ self.ind = 100 self.X = X self.Y = Y self.M = int(np.max([int(X.shape[0]) / self.points_per_experts, 1])) self.partition_type = partition_type self.N = int(X.shape[0] / self.M) self.partition = np.random.choice(X.shape[0],size=(self.M, self.N),replace=False) lengthscales = tf.convert_to_tensor([1.0] * self.X.shape[1], dtype=default_float()) self.kern = gpflow.kernels.RBF(lengthscales=lengthscales) self.invlink = gpflow.likelihoods.RobustMax(self.C) self.likelihood = gpflow.likelihoods.MultiClass(self.C,invlink=self.invlink) ivs = [] for i in range(self.M): init_method = ConditionalVariance() Z = init_method.compute_initialisation(np.array(X[self.partition[i]].copy()), self.ind, self.kern)[0] ivs.append(tf.convert_to_tensor(Z)) self.experts = [] for i in range(self.M): expert = gpflow.models.SVGP(kernel = self.kern, likelihood = self.likelihood, num_latent_gps = self.C, inducing_variable = ivs[i]) self.experts.append( expert ) for expert in self.experts: gpflow.set_trainable(expert.inducing_variable, True) self.opt = tf.keras.optimizers.Adam(learning_rate=0.05) self.optimize()
def create_bo_model(data): variance = tf.math.reduce_variance(initial_data[OBJECTIVE].observations) lengthscale = 1.0 * np.ones(2, dtype=gpflow.default_float()) kernel = gpflow.kernels.Matern52(variance=variance, lengthscales=lengthscale) jitter = gpflow.kernels.White(1e-12) gpr = gpflow.models.GPR(data.astuple(), kernel + jitter, noise_variance=1e-5) gpflow.set_trainable(gpr.likelihood, False) return trieste.models.create_model({ "model": gpr, "optimizer": gpflow.optimizers.Scipy(), "optimizer_args": { "minimize_args": {"options": dict(maxiter=100)}, }, })
def test_mixed_mok_with_Id_vs_independent_mok(): data = DataMixedKernelWithEye # Independent model k1 = mk.SharedIndependent(SquaredExponential(variance=0.5, lengthscales=1.2), data.L) f1 = InducingPoints(data.X[: data.M, ...]) model_1 = SVGP(k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_1, False) set_trainable(model_1.q_sqrt, True) gpflow.optimizers.Scipy().minimize( model_1.training_loss_closure(Data.data), variables=model_1.trainable_variables, method="BFGS", compile=True, ) # Mixed Model kern_list = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(data.L)] k2 = mk.LinearCoregionalization(kern_list, data.W) f2 = InducingPoints(data.X[: data.M, ...]) model_2 = SVGP(k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_2, False) set_trainable(model_2.q_sqrt, True) gpflow.optimizers.Scipy().minimize( model_2.training_loss_closure(Data.data), variables=model_2.trainable_variables, method="BFGS", compile=True, ) check_equality_predictions(Data.data, [model_1, model_2])
def __init__(self, data, kernel, X=None, likelihood_variance=1e-4): gpflow.Module.__init__(self) if X is None: self.X = Parameter(data[0], name="DataX", dtype=gpflow.default_float()) else: self.X = X self.Y = Parameter(data[1], name="DataY", dtype=gpflow.default_float()) self.data = [self.X, self.Y] self.kernel = kernel self.likelihood = gpflow.likelihoods.Gaussian() self.likelihood.variance.assign(likelihood_variance) set_trainable(self.likelihood.variance, False)
def fit_natgrad(model, data, maxiter, adam_learning_rate=0.01, gamma=1.0): if isinstance(model, gpflow.models.SVGP): variational_params = [(model.q_mu, model.q_sqrt)] else: [layer] = model.f_layers variational_params = [(layer.q_mu, layer.q_sqrt)] variational_params_vars = [] for param_list in variational_params: these_vars = [] for param in param_list: gpflow.set_trainable(param, False) these_vars.append(param.unconstrained_variable) variational_params_vars.append(these_vars) hyperparam_variables = model.trainable_variables num_data = get_num_data(data) @tf.function def training_loss(): return -model.elbo(data) / num_data natgrad = gpflow.optimizers.NaturalGradient(gamma=gamma) adam = tf.optimizers.Adam(adam_learning_rate) @tf.function def optimization_step(): """ NOTE: In GPflow, we would normally do alternating ascent: >>> natgrad.minimize(training_loss, var_list=variational_params) >>> adam.minimize(training_loss, var_list=hyperparam_variables) This, however, does not match up with the single pass we require for Keras's model.compile()/fit(). Hence we manually re-create the same optimization step. """ with tf.GradientTape() as tape: tape.watch(variational_params_vars) loss = training_loss() variational_grads, other_grads = tape.gradient( loss, (variational_params_vars, hyperparam_variables)) for (q_mu_grad, q_sqrt_grad), (q_mu, q_sqrt) in zip(variational_grads, variational_params): natgrad._natgrad_apply_gradients(q_mu_grad, q_sqrt_grad, q_mu, q_sqrt) adam.apply_gradients(zip(other_grads, hyperparam_variables)) for i in range(maxiter): optimization_step()
def __init__( self, data: Tuple[tf.Tensor, tf.Tensor], reduced_kernel: gpflow.kernels.Kernel, observed_kernel: gpflow.kernels.Kernel, mean_function: Optional[gpflow.mean_functions.MeanFunction] = None, noise_variance: float = 1.0, jitter: float = 1e-8, ): super().__init__() self.likelihood = gpflow.likelihoods.Gaussian(noise_variance) self.reduced_kernel = reduced_kernel # this will be the kernel for absorption axis self.observed_kernel = observed_kernel # this is the kernel for fluorescence axis self._pred_jitter_kernel = gpflow.kernels.White( variance=jitter) # needed in prediction only gpflow.set_trainable(self._pred_jitter_kernel.variance, False) W, Y = data R = W.shape[-1] # the reduced axis length Q = Y.shape[-1] # the observed axis length raxis = np.linspace(-1.0, 1.0, R)[:, None] # reduced axis qaxis = np.linspace(-1.0, 1.0, Q)[:, None] # observed axis self.raxis = tf.convert_to_tensor(raxis, dtype=gpflow.default_float()) self.qaxis = tf.convert_to_tensor(qaxis, dtype=gpflow.default_float()) # full_axis = 2D axis ordered to match kron(reduced_axis, observed_axis) self.full_axis = cartesian_prod(self.raxis, self.qaxis) self.data = tuple([ tf.convert_to_tensor(d, dtype=gpflow.config.default_float()) for d in data ]) # reassign for convenience now that we're in tensorflow mode W, Y = self.data # we need to store W for prediction, but not training self._W = W # make some cached quantities self._WTW = tf.matmul(W, W, transpose_a=True) self._YTW = tf.matmul(Y, W, transpose_a=True) self._yTy = tf.reduce_sum(Y * Y) # equivalent to fvec(Y).T @ fvec(Y) self._N = tf.cast(tf.shape(W)[0], dtype=gpflow.default_float()) self._R = tf.cast(tf.shape(W)[-1], dtype=gpflow.default_float()) self._Q = tf.cast(tf.shape(Y)[-1], dtype=gpflow.default_float()) if mean_function is None: self.prior_mean_func = gpflow.mean_functions.Zero() self.zpm = True else: self.prior_mean_func = mean_function self.zpm = False self.jitter = tf.cast(jitter, dtype=gpflow.default_float()) self.log2pi = tf.cast(np.log(np.pi * 2), dtype=gpflow.default_float())
def test_svgp_fixing_q_sqrt(): """ In response to bug #46, we need to make sure that the q_sqrt matrix can be fixed """ num_latent_gps = default_datum_svgp.Y.shape[1] model = gpflow.models.SVGP( kernel=gpflow.kernels.SquaredExponential(), likelihood=default_datum_svgp.lik, q_diag=True, num_latent_gps=num_latent_gps, inducing_variable=default_datum_svgp.Z, whiten=False, ) default_num_trainable_variables = len(model.trainable_variables) set_trainable(model.q_sqrt, False) assert len(model.trainable_variables) == default_num_trainable_variables - 1
def test_non_trainable_model_objective(): """ Checks that we can still compute the objective of a model that has no trainable parameters whatsoever (regression test for bug in log_prior()). In this case we have no priors, so log_prior should be zero to add no contribution to the objective. """ model = gpflow.models.GPR( (Data.X, Data.Y), kernel=gpflow.kernels.SquaredExponential(lengthscales=Data.ls, variance=Data.var), ) set_trainable(model, False) _ = model.log_marginal_likelihood() assert model.log_prior_density() == 0.0