def _build(self, X): """Build the graph of this layer.""" n_samples, input_dim = self._get_X_dims(X) W_shape, b_shape = self._weight_shapes(input_dim) # Layer weights self.pW = _make_prior(self.std, self.pW, W_shape) self.qW = _make_posterior(self.std, self.qW, W_shape, self.full) # Regularizers KL = kl_sum(self.qW, self.pW) # Linear layer Wsamples = _sample_W(self.qW, n_samples) Net = tf.matmul(X, Wsamples) # Optional bias if self.use_bias or not (self.prior_b is None and self.post_b is None): # Layer intercepts self.pb = _make_prior(self.std, self.pb, b_shape) self.qb = _make_posterior(self.std, self.qb, b_shape, False) # Regularizers KL += kl_sum(self.qb, self.pb) # Linear layer bsamples = tf.expand_dims(_sample_W(self.qb, n_samples), 1) Net += bsamples return Net, KL
def test_kl_normal_normal(): """Test Normal/Normal KL.""" dim = (5, 10) mu = np.zeros(dim, dtype=np.float32) std = 1.0 q = tf.distributions.Normal(mu, std) # Test 0 KL p = tf.distributions.Normal(mu, std) KL0 = kl_sum(q, p) # Test diff var std1 = 2.0 p = tf.distributions.Normal(mu, std1) KL1 = kl_sum(q, p) rKL1 = 0.5 * ((std / std1)**2 - 1 + np.log((std1 / std)**2)) * np.prod(dim) # Test diff mu mu1 = np.ones(dim, dtype=np.float32) p = tf.distributions.Normal(mu1, std) KL2 = kl_sum(q, p) rKL2 = 0.5 * (np.sum((mu1 - mu)**2) / std**2) tc = tf.test.TestCase() with tc.test_session(): kl0 = KL0.eval() assert np.isscalar(kl0) assert kl0 == 0. assert np.allclose(KL1.eval(), rKL1) assert np.allclose(KL2.eval(), rKL2)
def _build(self, X): """Build the graph of this layer.""" n_samples, (height, width, channels) = self._get_X_dims(X) W_shape, b_shape = self._weight_shapes(channels) # Layer weights self.pW = _make_prior(self.std, self.pW, W_shape) self.qW = _make_posterior(self.std, self.qW, W_shape, False) # Regularizers KL = kl_sum(self.qW, self.pW) # Linear layer Wsamples = _sample_W(self.qW, n_samples, False) Net = tf.map_fn(lambda args: tf.nn.conv2d( *args, padding=self.padding, strides=self.strides), elems=(X, Wsamples), dtype=tf.float32) # Optional bias if self.use_bias or not (self.prior_b is None and self.post_b is None): # Layer intercepts self.pb = _make_prior(self.std, self.pb, b_shape) self.qb = _make_posterior(self.std, self.qb, b_shape, False) # Regularizers KL += kl_sum(self.qb, self.pb) # Linear layer bsamples = tf.reshape(_sample_W(self.qb, n_samples, False), [n_samples, 1, 1, 1, self.filters]) Net += bsamples return Net, KL
def _build(self, X): """Build the graph of this layer.""" n_samples, (input_dim,) = self._get_X_dims(X) W_shp, b_shp = self._weight_shapes(input_dim) self.pstd, self.qstd = initialise_stds(input_dim, self.output_dim, self.prior_std0, self.learn_prior, "dense") # Layer weights self.pW = _make_prior(self.pstd, W_shp) self.qW = _make_posterior(self.qstd, W_shp, self.full, "dense") # Regularizers KL = kl_sum(self.qW, self.pW) # Linear layer Wsamples = _sample_W(self.qW, n_samples) Net = tf.matmul(X, Wsamples) # Optional bias if self.use_bias: # Layer intercepts self.pb = _make_prior(self.pstd, b_shp) self.qb = _make_posterior(self.qstd, b_shp, False, "dense_bias") # Regularizers KL += kl_sum(self.qb, self.pb) # Linear layer bsamples = tf.expand_dims(_sample_W(self.qb, n_samples), 1) Net += bsamples return Net, KL
def _build(self, X): """Build the graph of this layer.""" n_samples, (input_dim,) = self._get_X_dims(X) W_shape, _ = self._weight_shapes(self.n_categories) n_batch = tf.shape(X)[1] self.pstd, self.qstd = initialise_stds(input_dim, self.output_dim, self.prior_std0, self.learn_prior, "embed") # Layer weights self.pW = _make_prior(self.pstd, W_shape) self.qW = _make_posterior(self.qstd, W_shape, self.full, "embed") # Index into the relevant weights rather than using sparse matmul Wsamples = _sample_W(self.qW, n_samples) features = tf.map_fn(lambda wx: tf.gather(*wx, axis=0), (Wsamples, X), dtype=Wsamples.dtype) # Now concatenate the resulting features on the last axis f_dims = int(np.prod(features.shape[2:])) # need this for placeholders Net = tf.reshape(features, [n_samples, n_batch, f_dims]) # Regularizers KL = kl_sum(self.qW, self.pW) return Net, KL
def _build(self, X): """Build the graph of this layer.""" n_samples, (height, width, channels) = self._get_X_dims(X) W_shp, b_shp = self._weight_shapes(channels) # get effective IO shapes, DAN's fault if this is wrong receptive_field = np.product(W_shp[:-2]) n_inputs = receptive_field * channels n_outputs = receptive_field * self.filters self.pstd, self.qstd = initialise_stds(n_inputs, n_outputs, self.prior_std0, self.learn_prior, "conv2d") # Layer weights self.pW = _make_prior(self.pstd, W_shp) self.qW = _make_posterior(self.qstd, W_shp, False, "conv") # Regularizers KL = kl_sum(self.qW, self.pW) # Linear layer Wsamples = _sample_W(self.qW, n_samples, False) Net = tf.map_fn( lambda args: tf.nn.conv2d(*args, padding=self.padding, strides=self.strides), elems=(X, Wsamples), dtype=tf.float32) # Optional bias if self.use_bias: # Layer intercepts self.pb = _make_prior(self.pstd, b_shp) self.qb = _make_posterior(self.qstd, b_shp, False, "conv_bias") # Regularizers KL += kl_sum(self.qb, self.pb) # Linear layer bsamples = tf.reshape(_sample_W(self.qb, n_samples, False), [n_samples, 1, 1, 1, self.filters]) Net += bsamples return Net, KL
def weights(self, input_dim, n_features, dtype=np.float32): """Generate the random fourier weights for this kernel. Parameters ---------- input_dim : int the input dimension to this layer. n_features : int the number of unique random features, the actual output dimension of this layer will be ``2 * n_features``. dtype : np.dtype the dtype of the features to draw, this should match the observations. Returns ------- P : ndarray the random weights of the fourier features of shape ``(input_dim, n_features)``. KL : Tensor, float the KL penalty associated with the parameters in this kernel. """ self.lenscale, self.lenscale_post = _init_lenscale(self.given_lenscale, self.learn_lenscale, input_dim) dim = (input_dim, n_features) # Setup the prior, lenscale may be a variable, so dont use prior_normal pP_scale = self.__len2std(self.lenscale, n_features) pP = tf.distributions.Normal( loc=tf.zeros(dim), scale=pP_scale) # Initialise the posterior qP_scale = 1.0 / self.lenscale_post if qP_scale.ndim > 0: qP_scale = np.repeat(qP_scale[:, np.newaxis], n_features, axis=1) qP = norm_posterior(dim=dim, std0=qP_scale, suffix="kernel") KL = kl_sum(qP, pP) # We implement the VAR-FIXED method here from Cutajar et. al 2017, so # we pre-generate and fix the standard normal samples e = self._random_state.randn(*dim).astype(dtype) P = qP.mean() + qP.stddev() * e return P, KL
def test_kl_gaussian_gaussian(random): """Test Gaussian/Gaussian KL.""" dim = (5, 10) Dim = (5, 10, 10) mu0 = random.randn(*dim).astype(np.float32) L0 = random_chol(Dim) q = tfp.distributions.MultivariateNormalTriL(mu0, L0) mu1 = random.randn(*dim).astype(np.float32) L1 = random_chol(Dim) p = tfp.distributions.MultivariateNormalTriL(mu1, L1) KL = kl_sum(q, p) KLr = KLdiv(mu0, L0, mu1, L1) tc = tf.test.TestCase() with tc.test_session(): assert np.allclose(KL.eval(), KLr)
def weights(self, input_dim, n_features, dtype=np.float32): """Generate the random fourier weights for this kernel. Parameters ---------- input_dim : int the input dimension to this layer. n_features : int the number of unique random features, the actual output dimension of this layer will be ``2 * n_features``. dtype : np.dtype the dtype of the features to draw, this should match the observations. Returns ------- P : ndarray the random weights of the fourier features of shape ``(input_dim, n_features)``. KL : Tensor, float the KL penalty associated with the parameters in this kernel. """ dim = (input_dim, n_features) # Setup the prior, lenscale may be a variable, so dont use prior_normal pP = tf.distributions.Normal(loc=tf.zeros(dim), scale=self.__len2std(self.lenscale)) # Initialise the posterior if self.lenscale_post is None: self.lenscale_post = np.sqrt(1 / input_dim) qP = norm_posterior(dim=dim, std0=self.__len2std(self.lenscale_post)) KL = kl_sum(qP, pP) # We implement the VAR-FIXED method here from Cutajar et. al 2017, so # we pre-generate and fix the standard normal samples rand = np.random.RandomState(next(seedgen)) e = rand.randn(*dim).astype(dtype) P = qP.mean() + qP.stddev() * e return P, KL
def _build(self, X): """Build the graph of this layer.""" n_samples, input_dim = self._get_X_dims(X) W_shape, _ = self._weight_shapes(self.n_categories) assert input_dim == 1, "X must be a *column* of indices!" # Layer weights self.pW = self._make_prior(self.pW, W_shape) self.qW = self._make_posterior(self.qW, W_shape) # Index into the relevant weights rather than using sparse matmul Wsamples = self._sample_W(self.qW, n_samples) Net = tf.gather(Wsamples, X[0, :, 0], axis=1) # Regularizers KL = kl_sum(self.qW, self.pW) return Net, KL
def _build(self, X): # Extract perturbed predictions n_samples = tf.shape(X)[0] // 2 X_orig, X_pert = X[:n_samples], X[n_samples:] # Build Dense Layer F, KL = super()._build(X_orig) # Build a latent function density qWmean = _tile2samples(n_samples, tf.transpose(self.qW.mean())) qWvar = _tile2samples(n_samples, tf.transpose(self.qW.variance())) f_loc = tf.matmul(X_pert, qWmean) if self.use_bias: f_loc += self.qb.mean() f_scale = tf.sqrt(tf.matmul(X_pert ** 2, qWvar)) f_post = tf.distributions.Normal(f_loc, f_scale) # Calculate NCP loss KL += kl_sum(f_post, self.f_prior) / tf.to_float(n_samples) return F, KL
def test_kl_gaussian_normal(random): """Test Gaussian/Normal KL.""" dim = (5, 10) Dim = (5, 10, 10) mu0 = random.randn(*dim).astype(np.float32) L0 = random_chol(Dim) q = tfp.distributions.MultivariateNormalTriL(mu0, L0) mu1 = random.randn(*dim).astype(np.float32) std1 = 1.0 L1 = [(std1 * np.eye(dim[1])).astype(np.float32) for _ in range(dim[0])] p = tf.distributions.Normal(mu1, std1) KL = kl_sum(q, p) KLr = KLdiv(mu0, L0, mu1, L1) tc = tf.test.TestCase() with tc.test_session(): kl = KL.eval() assert np.isscalar(kl) assert np.allclose(kl, KLr)