def testBijector(self): x = np.float32(np.random.randn(3, 4, 4)) y = x.copy() for i in range(x.shape[0]): np.fill_diagonal(y[i, :, :], np.exp(np.diag(x[i, :, :]))) exp = tfb.Exp() b = tfb.TransformDiagonal(diag_bijector=exp) y_ = self.evaluate(b.forward(x)) self.assertAllClose(y, y_) x_ = self.evaluate(b.inverse(y)) self.assertAllClose(x, x_) fldj = self.evaluate(b.forward_log_det_jacobian(x, event_ndims=2)) ildj = self.evaluate(b.inverse_log_det_jacobian(y, event_ndims=2)) self.assertAllEqual( fldj, self.evaluate( exp.forward_log_det_jacobian(np.array( [np.diag(x_mat) for x_mat in x]), event_ndims=1))) self.assertAllEqual( ildj, self.evaluate( exp.inverse_log_det_jacobian(np.array( [np.diag(y_mat) for y_mat in y]), event_ndims=1)))
def testTheoreticalFldj(self, data): dim = data.draw(hps.integers(min_value=0, max_value=10)) diag_bijector = data.draw( bijector_hps.unconstrained_bijectors( max_forward_event_ndims=1, must_preserve_event_ndims=True).filter( _preserves_vector_dim(dim))) logging.info('Using diagonal bijector %s %s', diag_bijector.name, diag_bijector) bijector = tfb.TransformDiagonal(diag_bijector=diag_bijector) ensure_nonzero_batch = lambda shape: [d if d > 0 else 1 for d in shape] shape = data.draw( tfp_hps.shapes().map(ensure_nonzero_batch)) + [dim, dim] x = np.random.randn(*shape).astype(np.float64) y = self.evaluate(bijector.forward(x)) bijector_test_util.assert_bijective_and_finite(bijector, x, y, eval_func=self.evaluate, event_ndims=2, inverse_event_ndims=2, rtol=1e-5) fldj = bijector.forward_log_det_jacobian(x, event_ndims=2) # For constant-jacobian bijectors, the zero fldj may not be broadcast. fldj = fldj + tf.zeros(tf.shape(x)[:-2], dtype=x.dtype) fldj_theoretical = bijector_test_util.get_fldj_theoretical( bijector, x, event_ndims=2, inverse_event_ndims=2) self.assertAllClose(self.evaluate(fldj_theoretical), self.evaluate(fldj), atol=1e-5, rtol=1e-5)
def testTheoreticalFldjNormalCDF(self): # b/137367959 test failure trigger case (resolved by using # experimental_use_pfor=False as fallback instead of primary in # bijector_test_util.get_fldj_theoretical) bijector = tfb.TransformDiagonal(diag_bijector=tfb.NormalCDF()) x = np.zeros([0, 0]) fldj = bijector.forward_log_det_jacobian(x, event_ndims=2) fldj_theoretical = bijector_test_util.get_fldj_theoretical( bijector, x, event_ndims=2, inverse_event_ndims=2) self.assertAllClose(self.evaluate(fldj_theoretical), self.evaluate(fldj), atol=1e-5, rtol=1e-5)
def testJacobian(self): cholesky_to_vector = tfb.Chain([ tfb.Invert(tfb.FillTriangular()), tfb.TransformDiagonal(tfb.Invert(tfb.Exp())) ]) bijector = tfb.CholeskyToInvCholesky() for x in [np.array([[2.]], dtype=np.float64), np.array([[2., 0.], [3., 4.]], dtype=np.float64), np.array([[2., 0., 0.], [3., 4., 0.], [5., 6., 7.]], dtype=np.float64)]: fldj = bijector.forward_log_det_jacobian(x, event_ndims=2) fldj_numerical = self._get_fldj_numerical( bijector, x, event_ndims=2, eps=1.e-6, input_to_vector=cholesky_to_vector, output_to_vector=cholesky_to_vector) fldj_, fldj_numerical_ = self.evaluate([fldj, fldj_numerical]) self.assertAllClose(fldj_, fldj_numerical_)
def _random_chol(self, *shape): mat = self._rng.rand(*shape) chol = tfb.TransformDiagonal(tfb.Softplus())(mat) chol = tf.linalg.band_part(chol, -1, 0) sigma = tf.matmul(chol, chol, adjoint_b=True) return self.evaluate(chol), self.evaluate(sigma)
def _random_tril_matrix(self, shape): mat = self.rng.rand(*shape) chol = tfb.TransformDiagonal(tfb.Softplus())(mat) return tf.linalg.band_part(chol, -1, 0)
def bijectors(draw, bijector_name=None, batch_shape=None, event_dim=None, enable_vars=False): """Strategy for drawing Bijectors. The emitted bijector may be a basic bijector or an `Invert` of a basic bijector, but not a compound like `Chain`. Args: draw: Hypothesis strategy sampler supplied by `@hps.composite`. bijector_name: Optional Python `str`. If given, the produced bijectors will all have this type. If omitted, Hypothesis chooses one from the whitelist `TF2_FRIENDLY_BIJECTORS`. batch_shape: An optional `TensorShape`. The batch shape of the resulting bijector. Hypothesis will pick one if omitted. event_dim: Optional Python int giving the size of each of the underlying distribution's parameters' event dimensions. This is shared across all parameters, permitting square event matrices, compatible location and scale Tensors, etc. If omitted, Hypothesis will choose one. enable_vars: TODO(bjp): Make this `True` all the time and put variable initialization in slicing_test. If `False`, the returned parameters are all `tf.Tensor`s and not {`tf.Variable`, `tfp.util.DeferredTensor` `tfp.util.TransformedVariable`} Returns: bijectors: A strategy for drawing bijectors with the specified `batch_shape` (or an arbitrary one if omitted). """ if bijector_name is None: bijector_name = draw(hps.sampled_from(TF2_FRIENDLY_BIJECTORS)) if batch_shape is None: batch_shape = draw(tfp_hps.shapes()) if event_dim is None: event_dim = draw(hps.integers(min_value=2, max_value=6)) if bijector_name == 'Invert': underlying_name = draw( hps.sampled_from(sorted(set(TF2_FRIENDLY_BIJECTORS) - {'Invert'}))) underlying = draw( bijectors(bijector_name=underlying_name, batch_shape=batch_shape, event_dim=event_dim, enable_vars=enable_vars)) return tfb.Invert(underlying, validate_args=True) if bijector_name == 'TransformDiagonal': underlying_name = draw( hps.sampled_from(sorted(TRANSFORM_DIAGONAL_WHITELIST))) underlying = draw( bijectors(bijector_name=underlying_name, batch_shape=(), event_dim=event_dim, enable_vars=enable_vars)) return tfb.TransformDiagonal(underlying, validate_args=True) if bijector_name == 'Inline': if enable_vars: scale = tf.Variable(1., name='scale') else: scale = 2. b = tfb.AffineScalar(scale=scale) inline = tfb.Inline( forward_fn=b.forward, inverse_fn=b.inverse, forward_log_det_jacobian_fn=lambda x: b.forward_log_det_jacobian( # pylint: disable=g-long-lambda x, event_ndims=b.forward_min_event_ndims), forward_min_event_ndims=b.forward_min_event_ndims, is_constant_jacobian=b.is_constant_jacobian, ) inline.b = b return inline if bijector_name == 'DiscreteCosineTransform': dct_type = draw(hps.integers(min_value=2, max_value=3)) return tfb.DiscreteCosineTransform(validate_args=True, dct_type=dct_type) if bijector_name == 'PowerTransform': power = draw(hps.floats(min_value=0., max_value=10.)) return tfb.PowerTransform(validate_args=True, power=power) if bijector_name == 'Permute': event_ndims = draw(hps.integers(min_value=1, max_value=2)) axis = draw(hps.integers(min_value=-event_ndims, max_value=-1)) # This is a permutation of dimensions within an axis. # (Contrast with `Transpose` below.) permutation = draw(hps.permutations(np.arange(event_dim))) return tfb.Permute(permutation, axis=axis) if bijector_name == 'Reshape': event_shape_out = draw(tfp_hps.shapes(min_ndims=1)) # TODO(b/142135119): Wanted to draw general input and output shapes like the # following, but Hypothesis complained about filtering out too many things. # event_shape_in = draw(tfp_hps.shapes(min_ndims=1)) # hp.assume(event_shape_out.num_elements() == event_shape_in.num_elements()) event_shape_in = [event_shape_out.num_elements()] return tfb.Reshape(event_shape_out=event_shape_out, event_shape_in=event_shape_in, validate_args=True) if bijector_name == 'Transpose': event_ndims = draw(hps.integers(min_value=0, max_value=2)) # This is a permutation of axes. # (Contrast with `Permute` above.) permutation = draw(hps.permutations(np.arange(event_ndims))) return tfb.Transpose(perm=permutation) bijector_params = draw( broadcasting_params(bijector_name, batch_shape, event_dim=event_dim, enable_vars=enable_vars)) ctor = getattr(tfb, bijector_name) return ctor(validate_args=True, **bijector_params)
def build_trainable_highway_flow(width, residual_fraction_initial_value=0.5, activation_fn=None, gate_first_n=None, seed=None, validate_args=False): """Builds a HighwayFlow parameterized by trainable variables. The variables are transformed to enforce the following parameter constraints: - `residual_fraction` is bounded between 0 and 1. - `upper_diagonal_weights_matrix` is a randomly initialized (lower) diagonal matrix with positive diagonal of size `width x width`. - `lower_diagonal_weights_matrix` is a randomly initialized lower diagonal matrix with ones on the diagonal of size `width x width`; - `bias` is a randomly initialized vector of size `width`. Args: width: Input dimension of the bijector. residual_fraction_initial_value: Initial value for gating parameter, must be between 0 and 1. activation_fn: Callable invertible activation function (e.g., `tf.nn.softplus`), or `None`. gate_first_n: Decides which part of the input should be gated (useful for example when using auxiliary variables). seed: Seed for random initialization of the weights. validate_args: Python `bool`. Whether to validate input with runtime assertions. Default value: `False`. Returns: trainable_highway_flow: The initialized bijector. """ residual_fraction_initial_value = tf.convert_to_tensor( residual_fraction_initial_value, dtype_hint=tf.float32, name='residual_fraction_initial_value') dtype = residual_fraction_initial_value.dtype bias_seed, upper_seed, lower_seed = samplers.split_seed(seed, n=3) lower_bijector = tfb.Chain([ tfb.TransformDiagonal(diag_bijector=tfb.Shift(1.)), tfb.Pad(paddings=[(1, 0), (0, 1)]), tfb.FillTriangular() ]) unconstrained_lower_initial_values = samplers.normal( shape=lower_bijector.inverse_event_shape([width, width]), mean=0., stddev=.01, seed=lower_seed) upper_bijector = tfb.FillScaleTriL(diag_bijector=tfb.Softplus(), diag_shift=None) unconstrained_upper_initial_values = samplers.normal( shape=upper_bijector.inverse_event_shape([width, width]), mean=0., stddev=.01, seed=upper_seed) return HighwayFlow(residual_fraction=util.TransformedVariable( initial_value=residual_fraction_initial_value, bijector=tfb.Sigmoid(), dtype=dtype), activation_fn=activation_fn, bias=tf.Variable(samplers.normal((width, ), mean=0., stddev=0.01, seed=bias_seed), dtype=dtype), upper_diagonal_weights_matrix=util.TransformedVariable( initial_value=upper_bijector.forward( unconstrained_upper_initial_values), bijector=upper_bijector, dtype=dtype), lower_diagonal_weights_matrix=util.TransformedVariable( initial_value=lower_bijector.forward( unconstrained_lower_initial_values), bijector=lower_bijector, dtype=dtype), gate_first_n=gate_first_n, validate_args=validate_args)
def _random_pd_matrix(self, *shape): mat = rng.rand(*shape) chol = tfb.TransformDiagonal(tfb.Softplus())(mat) chol = tf.linalg.band_part(chol, -1, 0) return self.evaluate(tf.matmul(chol, chol, adjoint_b=True))
def _random_tril_matrix(self, shape, seed): mat = tf.random.normal(shape=shape, seed=seed, dtype=self.dtype) chol = tfb.TransformDiagonal( tfb.Shift(shift=self.dtype(1.))(tfb.Softplus()))(mat) return tf.linalg.band_part(chol, -1, 0)