def test_q_sqrt_constraints(inducing_points, kernel, mu, white): """ Test that sending in an unconstrained q_sqrt returns the same conditional evaluation and gradients. This is important to match the behaviour of the KL, which enforces q_sqrt is triangular. """ tril = np.tril(rng.randn(Ln, Nn, Nn)) q_sqrt_constrained = Parameter(tril, transform=triangular()) q_sqrt_unconstrained = Parameter(tril) diff_before_gradient_step = (q_sqrt_constrained - q_sqrt_unconstrained).numpy() assert_allclose(diff_before_gradient_step, 0) kls = [] for q_sqrt in [q_sqrt_constrained, q_sqrt_unconstrained]: with tf.GradientTape() as tape: kl = prior_kl(inducing_points, kernel, mu, q_sqrt, whiten=white) grad = tape.gradient(kl, q_sqrt.unconstrained_variable) q_sqrt.unconstrained_variable.assign_sub(grad) kls.append(kl) diff_kls_before_gradient_step = kls[0] - kls[1] assert_allclose(diff_kls_before_gradient_step, 0) diff_after_gradient_step = (q_sqrt_constrained - q_sqrt_unconstrained).numpy() assert_allclose(diff_after_gradient_step, 0)
def test_q_sqrt_constraints(Xdata, Xnew, kernel, mu, white): """ Test that sending in an unconstrained q_sqrt returns the same conditional evaluation and gradients. This is important to match the behaviour of the KL, which enforces q_sqrt is triangular. """ tril = np.tril(rng.randn(Ln, Nn, Nn)) q_sqrt_constrained = Parameter(tril, transform=triangular()) q_sqrt_unconstrained = Parameter(tril) diff_before_gradient_step = (q_sqrt_constrained - q_sqrt_unconstrained).numpy() assert_allclose(diff_before_gradient_step, 0) Fstars = [] for q_sqrt in [q_sqrt_constrained, q_sqrt_unconstrained]: with tf.GradientTape() as tape: _, Fstar_var = conditional(Xnew, Xdata, kernel, mu, q_sqrt=q_sqrt, white=white) grad = tape.gradient(Fstar_var, q_sqrt.unconstrained_variable) q_sqrt.unconstrained_variable.assign_sub(grad) Fstars.append(Fstar_var) diff_Fstar_before_gradient_step = Fstars[0] - Fstars[1] assert_allclose(diff_Fstar_before_gradient_step, 0) diff_after_gradient_step = (q_sqrt_constrained - q_sqrt_unconstrained).numpy() assert_allclose(diff_after_gradient_step, 0)
def __init__( self, input_dim: int, output_dim: int, num_data: int, w_mu: Optional[np.ndarray] = None, w_sqrt: Optional[np.ndarray] = None, activation: Optional[Callable] = None, is_mean_field: bool = True, temperature: float = 1e-4, # TODO is this intentional? ): """ :param input_dim: The input dimension (excluding bias) of this layer. :param output_dim: The output dimension of this layer. :param num_data: The number of points in the training dataset (used for scaling the KL regulariser). :param w_mu: Initial value of the variational mean for weights + bias. If not specified, this defaults to `xavier_initialization_numpy` for the weights and zero for the bias. :param w_sqrt: Initial value of the variational Cholesky of the (co)variance for weights + bias. If not specified, this defaults to 1e-5 * Identity. :param activation: The activation function. If not specified, this defaults to the identity. :param is_mean_field: Determines whether the approximation to the weight posterior is mean field. Must be consistent with the shape of ``w_sqrt``, if specified. :param temperature: For cooling (< 1.0) or heating (> 1.0) the posterior. """ super().__init__(dtype=default_float()) assert input_dim >= 1 assert output_dim >= 1 assert num_data >= 1 if w_mu is not None: # add + 1 for the bias assert w_mu.shape == ((input_dim + 1) * output_dim, ) if w_sqrt is not None: if not is_mean_field: assert w_sqrt.shape == ( (input_dim + 1) * output_dim, (input_dim + 1) * output_dim, ) else: assert w_sqrt.shape == ((input_dim + 1) * output_dim, ) assert temperature > 0.0 self.input_dim = input_dim self.output_dim = output_dim self.num_data = num_data self.w_mu_ini = w_mu self.w_sqrt_ini = w_sqrt self.activation = activation self.is_mean_field = is_mean_field self.temperature = temperature self.dim = (input_dim + 1) * output_dim self.full_output_cov = False self.full_cov = False self.w_mu = Parameter(np.zeros((self.dim, )), dtype=default_float(), name="w_mu") # [dim] self.w_sqrt = Parameter( np.zeros( (self.dim, self.dim)) if not self.is_mean_field else np.ones( (self.dim, )), transform=triangular() if not self.is_mean_field else positive(), dtype=default_float(), name="w_sqrt", ) # [dim, dim] or [dim]
def __init__( self, kernel: MultioutputKernel, inducing_variable: MultioutputInducingVariables, num_data: int, mean_function: Optional[MeanFunction] = None, *, num_samples: Optional[int] = None, full_cov: bool = False, full_output_cov: bool = False, num_latent_gps: int = None, whiten: bool = True, name: Optional[str] = None, verbose: bool = False, ): """ :param kernel: The multioutput kernel for this layer. :param inducing_variable: The inducing features for this layer. :param num_data: The number of points in the training dataset (see :attr:`num_data`). :param mean_function: The mean function that will be applied to the inputs. Default: :class:`~gpflow.mean_functions.Identity`. .. note:: The Identity mean function requires the input and output dimensionality of this layer to be the same. If you want to change the dimensionality in a layer, you may want to provide a :class:`~gpflow.mean_functions.Linear` mean function instead. :param num_samples: The number of samples to draw when converting the :class:`~tfp.layers.DistributionLambda` into a `tf.Tensor`, see :meth:`_convert_to_tensor_fn`. Will be stored in the :attr:`num_samples` attribute. If `None` (the default), draw a single sample without prefixing the sample shape (see :class:`tfp.distributions.Distribution`'s `sample() <https://www.tensorflow.org/probability/api_docs/python/tfp/distributions/Distribution#sample>`_ method). :param full_cov: Sets default behaviour of calling this layer (:attr:`full_cov` attribute): If `False` (the default), only predict marginals (diagonal of covariance) with respect to inputs. If `True`, predict full covariance over inputs. :param full_output_cov: Sets default behaviour of calling this layer (:attr:`full_output_cov` attribute): If `False` (the default), only predict marginals (diagonal of covariance) with respect to outputs. If `True`, predict full covariance over outputs. :param num_latent_gps: The number of (latent) GPs in the layer (which can be different from the number of outputs, e.g. with a :class:`~gpflow.kernels.LinearCoregionalization` kernel). This is used to determine the size of the variational parameters :attr:`q_mu` and :attr:`q_sqrt`. If possible, it is inferred from the *kernel* and *inducing_variable*. :param whiten: If `True` (the default), uses the whitened parameterisation of the inducing variables; see :attr:`whiten`. :param name: The name of this layer. :param verbose: The verbosity mode. Set this parameter to `True` to show debug information. """ super().__init__( make_distribution_fn=self._make_distribution_fn, convert_to_tensor_fn=self._convert_to_tensor_fn, dtype=default_float(), name=name, ) self.kernel = kernel self.inducing_variable = inducing_variable self.num_data = num_data if mean_function is None: mean_function = Identity() self.mean_function = mean_function self.full_output_cov = full_output_cov self.full_cov = full_cov self.whiten = whiten self.verbose = verbose try: num_inducing, self.num_latent_gps = verify_compatibility( kernel, mean_function, inducing_variable) # TODO: if num_latent_gps is not None, verify it is equal to self.num_latent_gps except GPLayerIncompatibilityException as e: if num_latent_gps is None: raise e if self.verbose: warnings.warn( "Could not verify the compatibility of the `kernel`, `inducing_variable` " "and `mean_function`. We advise using `gpflux.helpers.construct_*` to create " "compatible kernels and inducing variables. As " f"`num_latent_gps={num_latent_gps}` has been specified explicitly, this will " "be used to create the `q_mu` and `q_sqrt` parameters.") num_inducing, self.num_latent_gps = ( len(inducing_variable), num_latent_gps, ) self.q_mu = Parameter( np.zeros((num_inducing, self.num_latent_gps)), dtype=default_float(), name=f"{self.name}_q_mu" if self.name else "q_mu", ) # [num_inducing, num_latent_gps] self.q_sqrt = Parameter( np.stack( [np.eye(num_inducing) for _ in range(self.num_latent_gps)]), transform=triangular(), dtype=default_float(), name=f"{self.name}_q_sqrt" if self.name else "q_sqrt", ) # [num_latent_gps, num_inducing, num_inducing] self.num_samples = num_samples
def __init__( self, input_dim: int, output_dim: int, num_data: int, w_mu: Optional[np.ndarray] = None, w_sqrt: Optional[np.ndarray] = None, activation: Optional[Callable] = None, is_mean_field: bool = True, temperature: float = 1e-4, returns_samples: bool = True, ): """ A Bayesian dense layer for variational Bayesian neural nets. This layer holds the weight mean and sqrt as well as the temperature for cooling (or heating) the posterior. :param input_dim: The layer's input dimension (excluding bias) :param output_dim: The layer's output dimension :param num_data: number of data points :param w_mu: Initial value of the variational mean (weights + bias) :param w_sqrt: Initial value of the variational Cholesky (covering weights + bias) :param activation: The type of activation function (None is linear) :param is_mean_field: Determines mean field approximation of the weight posterior :param temperature: For cooling or heating the posterior :param returns_samples: If True, return samples on calling the layer, Else return mean and variance """ super().__init__(dtype=default_float()) assert input_dim >= 1 assert output_dim >= 1 assert num_data >= 1 if w_mu is not None: # add + 1 for the bias assert w_mu.shape == ((input_dim + 1) * output_dim,) if w_sqrt is not None: if not is_mean_field: assert w_sqrt.shape == ( (input_dim + 1) * output_dim, (input_dim + 1) * output_dim, ) else: assert w_sqrt.shape == ((input_dim + 1) * output_dim,) assert temperature > 0.0 self.input_dim = input_dim self.output_dim = output_dim self.num_data = num_data self.w_mu_ini = w_mu self.w_sqrt_ini = w_sqrt self.activation = activation self.is_mean_field = is_mean_field self.temperature = temperature self.returns_samples = returns_samples self.dim = (input_dim + 1) * output_dim self.full_output_cov = False self.full_cov = False self.w_mu = Parameter(np.zeros((self.dim,)), dtype=default_float(), name="w_mu") # [dim] self.w_sqrt = Parameter( np.zeros((self.dim, self.dim)) if not self.is_mean_field else np.ones((self.dim,)), transform=triangular() if not self.is_mean_field else positive(), dtype=default_float(), name="w_sqrt", ) # [dim, dim] or [dim]