def test_gaussian_process_deep_copyable( gpr_interface_factory: ModelFactoryType) -> None: x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) model, _ = gpr_interface_factory(x, fnc_2sin_x_over_3(x)) model_copy = copy.deepcopy(model) x_predict = tf.constant([[50.5]], gpflow.default_float()) # check deepcopy predicts same values as original mean_f, variance_f = model.predict(x_predict) mean_f_copy, variance_f_copy = model_copy.predict(x_predict) npt.assert_equal(mean_f, mean_f_copy) npt.assert_equal(variance_f, variance_f_copy) # check that updating the original doesn't break or change the deepcopy x_new = tf.concat( [x, tf.constant([[10.0], [11.0]], dtype=gpflow.default_float())], 0) new_data = Dataset(x_new, fnc_2sin_x_over_3(x_new)) model.update(new_data) model.optimize(new_data) mean_f_updated, variance_f_updated = model.predict(x_predict) mean_f_copy_updated, variance_f_copy_updated = model_copy.predict( x_predict) npt.assert_equal(mean_f_copy_updated, mean_f_copy) npt.assert_equal(variance_f_copy_updated, variance_f_copy) npt.assert_array_compare(operator.__ne__, mean_f_updated, mean_f) npt.assert_array_compare(operator.__ne__, variance_f_updated, variance_f)
def load_quadcopter_dataset(filename, standardise=False): data = np.load(filename) X = data['x'] Y = data['y'][:, 0:2] # Y = data['y'][:, 0:1] # Y = data['y'][:, 0:3] print("Input data shape: ", X.shape) print("Output data shape: ", Y.shape) # remove some data points def trim_dataset(X, Y, x1_low, x2_low, x1_high, x2_high): mask_0 = X[:, 0] < x1_low mask_1 = X[:, 1] < x2_low mask_2 = X[:, 0] > x1_high mask_3 = X[:, 1] > x2_high mask = mask_0 | mask_1 | mask_2 | mask_3 X_partial = X[mask, :] Y_partial = Y[mask, :] return X_partial, Y_partial X, Y = trim_dataset(X, Y, x1_low=-1., x2_low=-1., x1_high=1., x2_high=3.) X = tf.convert_to_tensor(X, dtype=default_float()) Y = tf.convert_to_tensor(Y, dtype=default_float()) print("Trimmed input data shape: ", X.shape) print("Trimmed output data shape: ", Y.shape) # standardise input mean_x, var_x = tf.nn.moments(X, axes=[0]) mean_y, var_y = tf.nn.moments(Y, axes=[0]) X = (X - mean_x) / tf.sqrt(var_x) Y = (Y - mean_y) / tf.sqrt(var_y) data = (X, Y) return data
def make_matrix(X, BP, eZ0, epsilon=1e-6): """Compute pZ which is N by N*3 matrix of prior assignment. This code has to be consistent with assigngp_dense.InitialiseVariationalPhi to where the equality is placed i.e. if x<=b trunk and if x>b branch or vice versa. We use the former convention.""" num_columns = 3 * tf.shape(X)[0] # for 3 latent fns rows = [] count = tf.zeros((1,), dtype=tf.int32) for x in X: # compute how many functions x may belong to # needs generalizing for more BPs n = tf.cast(tf.greater(x, BP), tf.int32) + 1 # n == 1 when x <= BP # n == 2 when x > BP row = [ tf.zeros(count + n - 1, dtype=gpflow.default_float()) + epsilon ] # all entries until count are zero # add 1's for possible entries probs = tf.ones(n, dtype=gpflow.default_float()) row.append(probs) row.append( tf.zeros(2 - 2 * (n - 1), dtype=gpflow.default_float()) + epsilon ) # append zero count += 3 row.append( tf.zeros(num_columns - count, dtype=gpflow.default_float()) + epsilon ) # ensure things are correctly shaped row = tf.concat(row, 0, name="singleconcat") row = tf.expand_dims(row, 0) rows.append(row) return tf.multiply(tf.concat(rows, 0, name="multiconcat"), eZ0)
def __init__(self, num_data: int, latent_dim: int, means: Optional[np.ndarray] = None): """ Directly parameterise the posterior of the latent variables associated with each datapoint with a diagonal multivariate Normal distribution. Note that across latent variables we assume a mean-field approximation. See :cite:t:`dutordoir2018cde` for a more thorough explanation of latent variable models and encoders. :param num_data: The number of datapoints, ``N``. :param latent_dim: The dimensionality of the latent variable, ``W``. :param means: The initialisation of the mean of the latent variable posterior distribution. (see :attr:`means`). If `None` (the default setting), set to ``np.random.randn(N, W) * 0.01``; otherwise, ``means`` should be an array of rank two with the shape ``[N, W]``. """ super().__init__() if means is None: # break the symmetry in the means: means = 0.01 * np.random.randn(num_data, latent_dim) else: if np.any(means.shape != (num_data, latent_dim)): raise EncoderInitializationError( f"means must have shape [num_data, latent_dim] = [{num_data}, {latent_dim}]; " f"got {means.shape} instead." ) # initialise distribution with a small standard deviation, as this has # been observed to help fitting: stds = 1e-5 * np.ones_like(means) # TODO: Rename to `scale` and `loc` to match tfp.distributions self.means = Parameter(means, dtype=default_float(), name="w_means") self.stds = Parameter(stds, transform=positive(), dtype=default_float(), name="w_stds")
def __init__(self, a, b, M): # [a, b] defining the interval of the Fourier representation: self.a = gpflow.Parameter(a, dtype=gpflow.default_float()) self.b = gpflow.Parameter(b, dtype=gpflow.default_float()) # integer array defining the frequencies, ω_m = 2π (b - a)/m: self.ms = np.arange(M) self.omegas = 2.0 * np.pi * self.ms / (b - a)
def predict_f(self, Xnew, full_cov=False): M = tf.shape(self.X)[0] K = self.kernel.K(self.X) Phi = tf.nn.softmax(self.logPhi) # try squashing Phi to avoid numerical errors Phi = (1 - 2e-6) * Phi + 1e-6 sigma2 = self.likelihood.variance L = (tf.linalg.cholesky(K) + tf.eye(M, dtype=gpflow.default_float()) * gpflow.default_jitter()) W = tf.transpose(L) * tf.sqrt(tf.math.reduce_sum(Phi, 0)) / tf.sqrt(sigma2) P = tf.linalg.matmul(W, tf.transpose(W)) + tf.eye( M, dtype=gpflow.default_float()) R = tf.linalg.cholesky(P) PhiY = tf.linalg.matmul(tf.transpose(Phi), self.Y) LPhiY = tf.linalg.matmul(tf.transpose(L), PhiY) c = tf.linalg.triangular_solve(R, LPhiY, lower=True) / sigma2 Kus = self.kernel.K(self.X, Xnew) tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True) tmp2 = tf.linalg.triangular_solve(R, tmp1, lower=True) mean = tf.linalg.matmul(tf.transpose(tmp2), c) if full_cov: var = (self.kernel.K(Xnew) + tf.linalg.matmul(tf.transpose(tmp2), tmp2) - tf.linalg.matmul(tf.transpose(tmp1), tmp1)) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = (self.kernel.K_diag(Xnew) + tf.math.reduce_sum(tf.math.square(tmp2), 0) - tf.math.reduce_sum(tf.math.square(tmp1), 0)) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean, var
def test_dgp_sample(two_layer_model: Callable[[TensorType], DeepGP]) -> None: x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) model = DeepGaussianProcess( two_layer_model(x), optimizer=tf.optimizers.Adam(), ) num_samples = 50 test_x = tf.constant([[2.5]], dtype=gpflow.default_float()) samples = model.sample(test_x, num_samples) assert samples.shape == [num_samples, 1, 1] sample_mean = tf.reduce_mean(samples, axis=0) sample_variance = tf.reduce_mean((samples - sample_mean)**2) reference_model = two_layer_model(x) def get_samples(query_points: TensorType, num_samples: int) -> TensorType: samples = [] for _ in range(num_samples): samples.append(sample_dgp(reference_model)(query_points)) return tf.stack(samples) ref_samples = get_samples(test_x, num_samples) ref_mean = tf.reduce_mean(ref_samples, axis=0) ref_variance = tf.reduce_mean((ref_samples - ref_mean)**2) error = 1 / tf.sqrt(tf.cast(num_samples, tf.float32)) npt.assert_allclose(sample_mean, ref_mean, atol=2 * error) npt.assert_allclose(sample_mean, 0, atol=error) npt.assert_allclose(sample_variance, ref_variance, atol=4 * error)
def test_model_stack_missing_predict_y() -> None: x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) model1 = _gpr(x, _3x_plus_10(x)) model2 = _QuadraticModel([1.0], [2.0]) stack = ModelStack((model1, 1), (model2, 1)) x_predict = tf.constant([[0]], gpflow.default_float()) with pytest.raises(NotImplementedError): stack.predict_y(x_predict)
def __init__( self, kernel, freq_axis, inducing_variable, noise_scale, *, alpha=None, mean_function=None, num_latent_gps: int = 1, q_diag: bool = False, q_mu=None, q_sqrt=None, whiten: bool = True, num_data=None, ): """ - kernel, inducing_variables, mean_function are appropriate GPflow objects - noise_scale is the estimated std_dev of observational noise (for a gaussian likelihood) - num_latent_gps is the number of latent processes to use, defaults to 1 - q_diag is a boolean. If True, the covariance is approximated by a diagonal matrix. - whiten is a boolean. If True, we use the whitened representation of the inducing points. - num_data is the total number of observations, defaults to X.shape[0] (relevant when feeding in external minibatches) """ # init the super class, accept args likelihood = gpflow.likelihoods.Gaussian(variance=noise_scale**2) super().__init__(kernel, likelihood, mean_function, num_latent_gps) self.num_data = num_data self.q_diag = q_diag self.whiten = whiten # we require inducing variable of type LaplacianDirichletFeature self.inducing_variable = inducing_variable assert type(self.inducing_variable) is LaplacianDirichletFeatures self.one_sided_axis = tf.sort( tf.convert_to_tensor(freq_axis, dtype=gpflow.default_float())) # require axis to be positive frequencies only assert tf.math.reduce_all(self.one_sided_axis >= 0) zero_axis_flag = self.one_sided_axis[ 0] <= 1e-6 # treat 0 to 1e-6 as zero if zero_axis_flag: self._axis_symmetrizer = symmetrize_axis_with_zero self._val_symmetrizer = symmetrize_vals_with_zero else: self._axis_symmetrizer = symmetrize_axis_no_zero self._val_symmetrizer = symmetrize_vals_no_zero self.axis = self._axis_symmetrizer(self.one_sided_axis) if alpha is None: self.alpha = tf.reshape(tf.constant(0.1, gpflow.default_float()), (-1)) else: self.alpha = alpha # init variational parameters num_inducing = self.inducing_variable.num_inducing self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)
def __init__(self, a, b, M, jitter=None): self.length = M # [a, b] defining the interval of the Fourier representation: self.a = gpflow.Parameter(a, dtype=gpflow.default_float()) self.b = gpflow.Parameter(b, dtype=gpflow.default_float()) self.jitter = jitter self.phis = gpflow.Parameter(np.random.uniform(0, 2 * np.pi, size=M)) self.omegas = gpflow.Parameter(np.random.uniform(0, 0.5 * M, size=M))
def test_gaussian_process_regression_predict_y(gpflow_interface_factory: ModelFactoryType) -> None: x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) model, _ = gpflow_interface_factory(x, _3x_plus_gaussian_noise(x)) x_predict = tf.constant([[50.5]], gpflow.default_float()) mean_f, variance_f = model.predict(x_predict) mean_y, variance_y = model.predict_y(x_predict) npt.assert_allclose(mean_f, mean_y) npt.assert_array_less(variance_f, variance_y)
def test_getset_by_path(path): d = ( tf.random.normal((10, 1), dtype=gpflow.default_float()), tf.random.normal((10, 1), dtype=gpflow.default_float()), ) k = gpflow.kernels.RBF() * gpflow.kernels.RBF() m = gpflow.models.GPR(d, kernel=k) gpflow.utilities.getattr_by_path(m, path) gpflow.utilities.setattr_by_path(m, path, None)
def __init__(self, a, b, M): """ `a` and `b` define the interval [a, b] of the Fourier representation. `M` specifies the number of frequencies to use. """ # [a, b] defining the interval of the Fourier representation: self.a = gpflow.Parameter(a, dtype=gpflow.default_float()) self.b = gpflow.Parameter(b, dtype=gpflow.default_float()) # integer array defining the frequencies, ω_m = 2π (b - a)/m: self.ms = np.arange(M)
def test_further(self): np.set_printoptions(suppress=True, precision=6) # X = np.linspace(0, 1, 4, dtype=float)[:, None] X = np.array([0.1, 0.2, 0.3, 0.4])[:, None] with tf.compat.v1.Session(): BP_tf = tf.compat.v1.placeholder(dtype=gpflow.default_float(), shape=[]) eZ0_tf = tf.compat.v1.placeholder(dtype=gpflow.default_float(), shape=(X.shape[0], X.shape[0] * 3)) pZ0 = np.array([[0.7, 0.3], [0.1, 0.9], [0.5, 0.5], [0.85, 0.15]]) eZ0 = pZ_construction_singleBP.expand_pZ0(pZ0) BP = 0.2 pZ = tf.compat.v1.Session().run( pZ_construction_singleBP.make_matrix(X, BP_tf, eZ0_tf), feed_dict={ BP_tf: BP, eZ0_tf: eZ0 }, ) print("pZ0", pZ0) print("eZ0", eZ0) print("pZ", pZ) for r, c in zip(range(0, X.shape[0]), range(0, X.shape[0] * 3, 3)): print(r, c) print(X[r], pZ[r, c:c + 3], pZ0[r, :]) if X[r] > BP: # after branch point should be prior assert np.allclose( pZ[r, c + 1:c + 3], pZ0[r, :], atol=1e-6), "must be the same! %s-%s" % ( str(pZ[r, c:c + 3]), str(pZ0[r, :]), ) else: assert np.allclose( pZ[r, c:c + 3], np.array([1.0, 0.0, 0.0]), atol=1e-6), "must be the same! %s-%s" % ( str(pZ[r, c:c + 3]), str(np.array([1.0, 0.0, 0.0])), ) eZ0z = pZ_construction_singleBP.expand_pZ0Zeros(pZ0) r = pZ_construction_singleBP.expand_pZ0PureNumpyZeros(eZ0z, BP, X) assert np.allclose(r, pZ, atol=1e-5) # try another pZ = tf.compat.v1.Session().run( pZ_construction_singleBP.make_matrix(X, BP_tf, eZ0_tf), feed_dict={ BP_tf: 0.3, eZ0_tf: eZ0 }, ) r = pZ_construction_singleBP.expand_pZ0PureNumpyZeros(eZ0z, 0.3, X) assert np.allclose(r, pZ, atol=1e-5)
def Kuu_sep_matern52_ldf(inducing_variable, kernel, jitter=None): """ Kuu is just the spectral density evaluated at the eigen-frequencies (square root of eigenvalues) """ inds, ω0, d = (lambda u: (u.inds, u.ω0, u.d))(inducing_variable) eigen_frequencies = tf.cast(inds+1, gpflow.default_float()) * ω0[None,:] S = SeparableMaternSpectralDensityND(eigen_frequencies, tf.constant(5/2, dtype=gpflow.default_float()), d, kernel.lengthscales) S = 1/(kernel.variance*tf.clip_by_value(S,gpflow.config.default_jitter(),1E6)) return Diag(S, is_self_adjoint=True, is_positive_definite=True)
def test_unknown_size_inputs(): """ Test for #725 and #734. When the shape of the Gaussian's mean had at least one unknown parameter, `gauss_kl` would blow up. This happened because `tf.size` can only output types `tf.int32` or `tf.int64`. """ mu = np.ones([1, 4], dtype=default_float()) sqrt = np.ones([4, 1, 1], dtype=default_float()) known_shape = gauss_kl(*map(tf.constant, [mu, sqrt])) unknown_shape = gauss_kl(mu, sqrt) np.testing.assert_allclose(known_shape, unknown_shape)
def test_dgp_predict() -> None: x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) reference_model = single_layer_dgp_model(x) model = DeepGaussianProcess(single_layer_dgp_model(x)) test_x = tf.constant([[2.5]], dtype=gpflow.default_float()) ref_mean, ref_var = reference_model.predict_f(test_x) f_mean, f_var = model.predict(test_x) npt.assert_allclose(f_mean, ref_mean) npt.assert_allclose(f_var, ref_var)
def __init__(self, d: int, m: int, R=None, L=None, freq_max=None, freq_strategy='bottom_k_squares', start_ind=0): assert int(d) > 0 self._d = int(d) assert int(m) > 0 self._m = int(m) N = int(m) # if int(m) <= 1000: # N = int(m) # else: # N = int(max(np.sqrt(m),1000)) self.N = N if L is None: if freq_max is not None: L = tf.constant(self.d * [(1 / float(freq_max)) * float(m) * np.pi / 2], dtype=gpflow.default_float()) else: L = tf.constant(self.d * [1.], dtype=gpflow.default_float()) else: L = tf.reshape(tf.constant(L, dtype=gpflow.default_float()), (-1)) assert len(L) == self.d start_ind = int(start_ind) assert start_ind >= 0 self._L = tf.stack( [tf.constant(float(l), dtype=gpflow.default_float()) for l in L]) self._piover2 = tf.constant(np.pi / 2, dtype=gpflow.default_float()) self._base_freq = self._piover2 / self.L if freq_strategy == 'bottom_k_squares': _, inds = bottomksum(tf.math.square(self._freqs_per_dim()), m + start_ind) elif freq_strategy == 'bottom_k': _, inds = bottomksum(self._freqs_per_dim(), m + start_ind) else: ValueError('unknown frequency selection strategy') self._inds = tf.transpose(inds[:, start_ind:]) if R is not None: self._R = int(R) self.remainder = LaplacianDirichletFeatures( self.d, int(R), None, L=self.L, freq_strategy=freq_strategy, start_ind=self.num_inducing)
def test_gaussian_process_regression_update(gpr_interface_factory) -> None: x = tf.constant(np.arange(5).reshape(-1, 1), dtype=gpflow.default_float()) model = gpr_interface_factory(x, _3x_plus_10(x)) x_new = tf.concat([x, tf.constant([[10.0], [11.0]], dtype=gpflow.default_float())], 0) new_data = Dataset(x_new, _3x_plus_10(x_new)) model.update(new_data) model.optimize(new_data) reference_model = _reference_gpr(x_new, _3x_plus_10(x_new)) gpflow.optimizers.Scipy().minimize( reference_model.training_loss_closure(), reference_model.trainable_variables ) internal_model = model.model npt.assert_allclose(internal_model.training_loss(), reference_model.training_loss(), rtol=1e-6)
def __init__(self, data, kernel, X=None, likelihood_variance=1e-4): gpflow.Module.__init__(self) if X is None: self.X = Parameter(data[0], name="DataX", dtype=gpflow.default_float()) else: self.X = X self.Y = Parameter(data[1], name="DataY", dtype=gpflow.default_float()) self.data = [self.X, self.Y] self.kernel = kernel self.likelihood = gpflow.likelihoods.Gaussian() self.likelihood.variance.assign(likelihood_variance) set_trainable(self.likelihood.variance, False)
def init_variational_params(self, num_inducing): q_mu = np.zeros( (num_inducing, self.num_kernels, self.num_latent_gps)) # M x K x O self.q_mu = Parameter(q_mu, dtype=default_float()) q_sqrt = [] for _ in range(self.num_kernels): q_sqrt.append([ np.eye(num_inducing, dtype=default_float()) for _ in range(self.num_latent_gps) ]) q_sqrt = np.array(q_sqrt) self.q_sqrt = Parameter(q_sqrt, transform=triangular()) # K x O x M x M
def predict_samples( self, inputs: TensorType, *, num_samples: Optional[int] = None, full_output_cov: bool = False, full_cov: bool = False, whiten: bool = False, ) -> tf.Tensor: """ Make a sample predictions at N test inputs, with input_dim = D, output_dim = Q. Return a sample, and the conditional mean and covariance at these points. :param inputs: the inputs to predict at. shape [N, D] :param num_samples: the number of samples S, to draw. shape [S, N, Q] if S is not None else [N, Q]. :param full_output_cov: assert to False since not supported for now :param full_cov: assert to False since not supported for now :param whiten: assert to False since not sensible in Bayesian neural nets """ assert full_output_cov is False assert full_cov is False assert whiten is False _num_samples = num_samples or 1 z = tf.random.normal((self.dim, _num_samples), dtype=default_float()) # [dim, S] if not self.is_mean_field: w = self.w_mu[:, None] + tf.matmul(self.w_sqrt, z) # [dim, S] else: w = self.w_mu[:, None] + self.w_sqrt[:, None] * z # [dim, S] N = tf.shape(inputs)[0] inputs_concat_1 = tf.concat( (inputs, tf.ones((N, 1), dtype=default_float())), axis=-1 ) # [N, D+1] samples = tf.tensordot( inputs_concat_1, tf.reshape(tf.transpose(w), (_num_samples, self.input_dim + 1, self.output_dim)), [[-1], [1]], ) # [N, S, Q] if num_samples is None: samples = tf.squeeze(samples, axis=-2) # [N, Q] else: samples = tf.transpose(samples, perm=[1, 0, 2]) # [S, N, Q] if self.activation is not None: samples = self.activation(samples) return samples
def test_gpflow_predictor_predict() -> None: model = _QuadraticPredictor() mean, variance = model.predict(tf.constant([[2.5]], gpflow.default_float())) assert mean.shape == [1, 1] assert variance.shape == [1, 1] npt.assert_allclose(mean, [[6.25]], rtol=0.01) npt.assert_allclose(variance, [[1.0]], rtol=0.01)
def reparameterize(mean, var, z, full_cov=False): """ Implements the 'reparameterization trick' for the Gaussian, either full rank or diagonal If the z is a sample from N(0, 1), the output is a sample from N(mean, var) If full_cov=True then var must be of shape S,N,N,D and the full covariance is used. Otherwise var must be S,N,D and the operation is elementwise :param mean: mean of shape S,N,D :param var: covariance of shape S,N,D or S,N,N,D :param z: samples form unit Gaussian of shape S,N,D :param full_cov: bool to indicate whether var is of shape S,N,N,D or S,N,D :return sample from N(mean, var) of shape S,N,D """ if var is None: return mean if full_cov is False: return mean + z * (var + gpflow.default_jitter())**0.5 else: S, N, D = tf.shape(mean)[0], tf.shape(mean)[1], tf.shape(mean)[ 2] # var is SNND mean = tf.transpose(mean, (0, 2, 1)) # SND -> SDN var = tf.transpose(var, (0, 3, 1, 2)) # SNND -> SDNN I = gpflow.default_jitter() * tf.eye( N, dtype=gpflow.default_float())[None, None, :, :] # 11NN chol = tf.linalg.cholesky(var + I) # SDNN z_SDN1 = tf.transpose(z, [0, 2, 1])[:, :, :, None] # SND->SDN1 f = mean + tf.matmul(chol, z_SDN1)[:, :, :, 0] # SDN(1) return tf.transpose(f, (0, 2, 1)) # SND
def test_find_best_model_initialization_improves_likelihood( gpflow_interface_factory: ModelFactoryType, dim: int ) -> None: x = tf.constant( np.arange(1, 1 + 10 * dim).reshape(-1, dim), dtype=gpflow.default_float() ) # shape: [10, dim] model, _ = gpflow_interface_factory(x, fnc_3x_plus_10(x)[:, 0:1]) model.model.kernel = gpflow.kernels.RBF(variance=1.0, lengthscales=[0.2] * dim) if isinstance(model, (VariationalGaussianProcess, SparseVariational)): pytest.skip("find_best_model_initialization is only implemented for the GPR models.") model.model.kernel.variance.prior = tfp.distributions.LogNormal( loc=np.float64(-2.0), scale=np.float64(1.0) ) upper = tf.cast([10.0] * dim, dtype=tf.float64) lower = upper / 100 model.model.kernel.lengthscales = gpflow.Parameter( model.model.kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) ) pre_init_loss = model.model.training_loss() model.find_best_model_initialization(100) post_init_loss = model.model.training_loss() npt.assert_array_less(post_init_loss, pre_init_loss)
def test_find_best_model_initialization_changes_params_with_sigmoid_bijectors( gpflow_interface_factory: ModelFactoryType, dim: int ) -> None: x = tf.constant( np.arange(1, 1 + 10 * dim).reshape(-1, dim), dtype=gpflow.default_float() ) # shape: [10, dim] model, _ = gpflow_interface_factory(x, fnc_3x_plus_10(x)[:, 0:1]) model.model.kernel = gpflow.kernels.RBF(lengthscales=[0.2] * dim) if isinstance(model, (VariationalGaussianProcess, SparseVariational)): pytest.skip("find_best_model_initialization is only implemented for the GPR models.") upper = tf.cast([10.0] * dim, dtype=tf.float64) lower = upper / 100 model.model.kernel.lengthscales = gpflow.Parameter( model.model.kernel.lengthscales, transform=tfp.bijectors.Sigmoid(low=lower, high=upper) ) model.find_best_model_initialization(2) npt.assert_allclose(1.0, model.model.kernel.variance) npt.assert_array_equal(dim, model.model.kernel.lengthscales.shape) npt.assert_raises( AssertionError, npt.assert_allclose, [0.2, 0.2], model.model.kernel.lengthscales )
def sinc_scaled_components(x, N): """ returns a matrix R(x) such that: sinc(x) ~= R(x).sum(-1) """ ctype = dtype_to_ctype(default_float()) dtype = default_float() pk = sinc_approx_poles(N) #positive poles only num = -2 * np.pi * 1j * tf.exp( 1j * tf.cast(2 * np.pi * tf.abs(tf.expand_dims(x, -1)), ctype) * tf.expand_dims(pk, 0)) denom1 = -tf.expand_dims(pk, -1) - tf.expand_dims(pk, 0) denom2 = tf.linalg.set_diag( -tf.expand_dims(pk, -1) + tf.expand_dims(pk, 0), tf.ones(N, ctype)) denom = tf.reduce_prod(4 * denom1 * denom2, -1) return num / tf.expand_dims(denom, 0)
def elbo(self, data): """ Computes the evidence lower bound according to eq. (17) in the paper. :param data: Tuple of two tensors for input data X and labels Y. :return: Tensor representing ELBO. """ X, Y = data num_data = X.shape[0] likelihood = tf.reduce_sum(self.expected_data_log_likelihood(X, Y)) # scale loss term corresponding to minibatch size scale = tf.cast(num_data, gpflow.default_float()) scale /= tf.cast(X.shape[0], gpflow.default_float()) # Compute KL term KL = tf.reduce_sum([layer.KL() for layer in self.layers]) # print(scale*likelihood, -KL) return scale * likelihood - KL
def test_inducing_points_with_variable_shape(): N, M1, D, P = 50, 13, 3, 1 X, Y = np.random.randn(N, D), np.random.randn(N, P) Z1 = np.random.randn(M1, D) # use explicit tf.Variable with None shape: iv = gpflow.inducing_variables.InducingPoints( tf.Variable(Z1, trainable=False, dtype=gpflow.default_float(), shape=(None, D))) # Note that we cannot have Z be trainable if we want to be able to change its shape; # TensorFlow optimizers expect shape to be known at construction time. m = gpflow.models.SGPR(data=(X, Y), kernel=gpflow.kernels.Matern32(), inducing_variable=iv) # Check 1: that we can still optimize with None shape opt = tf.optimizers.Adam() @tf.function def optimization_step(): opt.minimize(m.training_loss, m.trainable_variables) optimization_step() # Check 2: that we can successfully assign a new Z with different number of inducing points! Z2 = np.random.randn(M1 + 1, D) m.inducing_variable.Z.assign(Z2) # Check 3: that we can also optimize with changed Z tensor optimization_step()
def call(self, inputs: TensorType, *args: List[Any], **kwargs: Dict[str, Any]) -> tf.Tensor: """ The default behaviour upon calling this layer. This method calls the `tfp.layers.DistributionLambda` super-class `call` method, which constructs a `tfp.distributions.Distribution` for the predictive distributions at the input points (see :meth:`_make_distribution_fn`). You can pass this distribution to `tf.convert_to_tensor`, which will return samples from the distribution (see :meth:`_convert_to_tensor_fn`). This method also adds a layer-specific loss function, given by the KL divergence between this layer and the GP prior (scaled to per-datapoint). """ outputs = super().call(inputs, *args, **kwargs) if kwargs.get("training"): loss_per_datapoint = self.prior_kl() / self.num_data else: # TF quirk: add_loss must always add a tensor to compile loss_per_datapoint = tf.constant(0.0, dtype=default_float()) self.add_loss(loss_per_datapoint) # Metric names should be unique; otherwise they get overwritten if you # have multiple with the same name name = f"{self.name}_prior_kl" if self.name else "prior_kl" self.add_metric(loss_per_datapoint, name=name, aggregation="mean") return outputs