def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name="VectorExponentialLinearOperator"): """Construct Vector Exponential distribution supported on a subset of `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = locals() if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: raise TypeError("`scale` parameter must have floating-point dtype.") with ops.name_scope(name, values=[loc] + scale.graph_parents): # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) super(VectorExponentialLinearOperator, self).__init__( distribution=exponential.Exponential(rate=array_ops.ones( [], dtype=scale.dtype), allow_nan_stats=allow_nan_stats), bijector=bijectors.AffineLinearOperator( shift=loc, scale=scale, validate_args=validate_args), batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name="VectorExponentialLinearOperator"): """Construct Vector Exponential distribution supported on a subset of `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = locals() if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: raise TypeError("`scale` parameter must have floating-point dtype.") with ops.name_scope(name, values=[loc] + scale.graph_parents): # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) super(VectorExponentialLinearOperator, self).__init__( distribution=exponential.Exponential(rate=array_ops.ones( [], dtype=scale.dtype), allow_nan_stats=allow_nan_stats), bijector=bijectors.AffineLinearOperator( shift=loc, scale=scale, validate_args=validate_args), batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def test_none_loc_static_scale(self): loc = None scale = linear_operator_diag.LinearOperatorDiag(np.ones((5, 1, 3))) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) self.assertEqual(tensor_shape.TensorShape([5, 1]), batch_shape) self.assertEqual(tensor_shape.TensorShape([3]), event_shape)
def test_none_loc_static_scale(self): loc = None scale = linear_operator_diag.LinearOperatorDiag(np.ones((5, 1, 3))) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) self.assertEqual(tensor_shape.TensorShape([5, 1]), batch_shape) self.assertEqual(tensor_shape.TensorShape([3]), event_shape)
def test_none_loc_dynamic_scale(self): loc = None diag = array_ops.placeholder(dtypes.float64) scale = linear_operator_diag.LinearOperatorDiag(diag) with self.test_session() as sess: batch_shape, event_shape = sess.run( distribution_util.shapes_from_loc_and_scale(loc, scale), feed_dict={diag: np.ones((5, 1, 3))}) self.assertAllEqual([5, 1], batch_shape) self.assertAllEqual([3], event_shape)
def test_none_loc_dynamic_scale(self): loc = None diag = array_ops.placeholder(dtypes.float64) scale = linear_operator_diag.LinearOperatorDiag(diag) with self.test_session() as sess: batch_shape, event_shape = sess.run( distribution_util.shapes_from_loc_and_scale(loc, scale), feed_dict={diag: np.ones((5, 1, 3))}) self.assertAllEqual([5, 1], batch_shape) self.assertAllEqual([3], event_shape)
def test_dynamic_loc_static_scale(self): loc = array_ops.placeholder(dtypes.float64) diag = constant_op.constant(np.ones((5, 2, 3))) scale = linear_operator_diag.LinearOperatorDiag(diag) with self.test_session(): batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) # batch_shape depends on both args, and so is dynamic. Since loc did not # have static shape, we inferred event shape entirely from scale, and this # is available statically. self.assertAllEqual( [5, 2], batch_shape.eval(feed_dict={loc: np.zeros((2, 3))})) self.assertAllEqual([3], event_shape)
def test_dynamic_loc_static_scale(self): loc = array_ops.placeholder(dtypes.float64) diag = constant_op.constant(np.ones((5, 2, 3))) scale = linear_operator_diag.LinearOperatorDiag(diag) with self.test_session(): batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) # batch_shape depends on both args, and so is dynamic. Since loc did not # have static shape, we inferred event shape entirely from scale, and this # is available statically. self.assertAllEqual( [5, 2], batch_shape.eval(feed_dict={loc: np.zeros((2, 3))})) self.assertAllEqual([3], event_shape)
def determine_batch_event_shapes(mix_loc, mix_scale, endpoint_affine): """Helper to infer batch_shape and event_shape.""" with ops.name_scope(name="determine_batch_event_shapes"): mix_batch_shape = distribution_util.prefer_static_broadcast_shape( array_ops.shape(mix_loc, name="mix_loc_shape"), array_ops.shape(mix_scale, name="mix_scale_shape")) if isinstance(mix_batch_shape, tensor_shape.TensorShape): mix_batch_shape = mix_batch_shape.with_rank_at_least(1)[:-1] else: s = static_value(mix_batch_shape) if s is not None: mix_batch_shape = ops.convert_to_tensor(s[:-1], dtype=dtypes.int32, name="mix_batch_shape") else: mix_batch_shape = mix_batch_shape[:-1] # We broadcast with a 1D constant to automatically make the result a # TensorShape if possible. batch_shape = distribution_util.prefer_static_broadcast_shape( mix_batch_shape, constant_op.constant([], dtype=dtypes.int32, name="batch_shape")) event_shape = constant_op.constant([], dtype=dtypes.int32, name="event_shape") for aff in endpoint_affine: b, e = distribution_util.shapes_from_loc_and_scale( aff.shift, aff.scale) if batch_shape is None: batch_shape = distribution_util.prefer_static_broadcast_shape( mix_batch_shape, b) else: batch_shape = distribution_util.prefer_static_broadcast_shape( batch_shape, b) event_shape = distribution_util.prefer_static_broadcast_shape( event_shape, e) if isinstance(batch_shape, tensor_shape.TensorShape): batch_shape = ops.convert_to_tensor(batch_shape.as_list(), dtype=dtypes.int32, name="batch_shape") if isinstance(event_shape, tensor_shape.TensorShape): event_shape = ops.convert_to_tensor(event_shape.as_list(), dtype=dtypes.int32, name="event_shape") return batch_shape, event_shape
def _create_dist(self): scale_diag = tf.nn.softplus(self._scale_variable) if self._softplus_scale \ else self._scale_variable scale = distribution_util.make_diag_scale(loc=self._loc_variable, scale_diag=scale_diag, validate_args=False, assert_positive=False) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( self._loc_variable, scale) return tfp.TransformedDistribution( distribution=tfp.Cauchy(loc=tf.zeros([], dtype=scale.dtype), scale=tf.ones([], dtype=scale.dtype)), bijector=bijectors.AffineLinearOperator(shift=self._loc_variable, scale=scale), batch_shape=batch_shape, event_shape=event_shape, name="MultivariateCauchyDiag" + ("SoftplusScale" if self._softplus_scale else ""))
def determine_batch_event_shapes(mix_loc, mix_scale, endpoint_affine): """Helper to infer batch_shape and event_shape.""" with ops.name_scope(name="determine_batch_event_shapes"): mix_batch_shape = distribution_util.prefer_static_broadcast_shape( array_ops.shape(mix_loc, name="mix_loc_shape"), array_ops.shape(mix_scale, name="mix_scale_shape")) if isinstance(mix_batch_shape, tensor_shape.TensorShape): mix_batch_shape = mix_batch_shape.with_rank_at_least(1)[:-1] else: s = static_value(mix_batch_shape) if s is not None: mix_batch_shape = ops.convert_to_tensor( s[:-1], dtype=dtypes.int32, name="mix_batch_shape") else: mix_batch_shape = mix_batch_shape[:-1] # We broadcast with a 1D constant to automatically make the result a # TensorShape if possible. batch_shape = distribution_util.prefer_static_broadcast_shape( mix_batch_shape, constant_op.constant([], dtype=dtypes.int32, name="batch_shape")) event_shape = constant_op.constant( [], dtype=dtypes.int32, name="event_shape") for aff in endpoint_affine: b, e = distribution_util.shapes_from_loc_and_scale(aff.shift, aff.scale) if batch_shape is None: batch_shape = distribution_util.prefer_static_broadcast_shape( mix_batch_shape, b) else: batch_shape = distribution_util.prefer_static_broadcast_shape( batch_shape, b) event_shape = distribution_util.prefer_static_broadcast_shape( event_shape, e) if isinstance(batch_shape, tensor_shape.TensorShape): batch_shape = ops.convert_to_tensor( batch_shape.as_list(), dtype=dtypes.int32, name="batch_shape") if isinstance(event_shape, tensor_shape.TensorShape): event_shape = ops.convert_to_tensor( event_shape.as_list(), dtype=dtypes.int32, name="event_shape") return batch_shape, event_shape
def test_static_loc_static_scale_non_matching_event_size_raises(self): loc = constant_op.constant(np.zeros((2, 4))) scale = linear_operator_diag.LinearOperatorDiag(np.ones((5, 1, 3))) with self.assertRaisesRegexp(ValueError, "could not be broadcast"): distribution_util.shapes_from_loc_and_scale(loc, scale)
def __init__(self, df, loc=None, scale_identity_multiplier=None, scale_diag=None, scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, validate_args=False, allow_nan_stats=True, name="VectorStudentT"): """Instantiates the vector Student's t-distributions on `R^k`. The `batch_shape` is the broadcast between `df.batch_shape` and `Affine.batch_shape` where `Affine` is constructed from `loc` and `scale_*` arguments. The `event_shape` is the event shape of `Affine.event_shape`. Args: df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. Must be scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the same `batch_shape` implied by `loc`, `scale_*`. loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. scale_perturb_factor: Floating-point `Tensor` representing factor matrix with last two dimensions of shape `(k, r)`. When `None`, no rank-r update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which represents an r x r Diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() graph_parents = [ df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag ] with ops.name_scope(name): with ops.name_scope("init", values=graph_parents): # The shape of the _VectorStudentT distribution is governed by the # relationship between df.batch_shape and affine.batch_shape. In # pseudocode the basic procedure is: # if df.batch_shape is scalar: # if affine.batch_shape is not scalar: # # broadcast distribution.sample so # # it has affine.batch_shape. # self.batch_shape = affine.batch_shape # else: # if affine.batch_shape is scalar: # # let affine broadcasting do its thing. # self.batch_shape = df.batch_shape # All of the above magic is actually handled by TransformedDistribution. # Here we really only need to collect the affine.batch_shape and decide # what we're going to pass in to TransformedDistribution's # (override) batch_shape arg. affine = bijectors.Affine( shift=loc, scale_identity_multiplier=scale_identity_multiplier, scale_diag=scale_diag, scale_tril=scale_tril, scale_perturb_factor=scale_perturb_factor, scale_perturb_diag=scale_perturb_diag, validate_args=validate_args) distribution = student_t.StudentT( df=df, loc=array_ops.zeros([], dtype=affine.dtype), scale=array_ops.ones([], dtype=affine.dtype)) batch_shape, override_event_shape = ( distribution_util.shapes_from_loc_and_scale( affine.shift, affine.scale)) override_batch_shape = distribution_util.pick_vector( distribution.is_scalar_batch(), batch_shape, constant_op.constant([], dtype=dtypes.int32)) super(_VectorStudentT, self).__init__(distribution=distribution, bijector=affine, batch_shape=override_batch_shape, event_shape=override_event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, df, loc=None, scale_identity_multiplier=None, scale_diag=None, scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, validate_args=False, allow_nan_stats=True, name="VectorStudentT"): """Instantiates the vector Student's t-distributions on `R^k`. The `batch_shape` is the broadcast between `df.batch_shape` and `Affine.batch_shape` where `Affine` is constructed from `loc` and `scale_*` arguments. The `event_shape` is the event shape of `Affine.event_shape`. Args: df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. Must be scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the same `batch_shape` implied by `loc`, `scale_*`. loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. scale_perturb_factor: Floating-point `Tensor` representing factor matrix with last two dimensions of shape `(k, r)`. When `None`, no rank-r update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which represents an r x r Diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() graph_parents = [df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag] with ops.name_scope(name) as name: with ops.name_scope("init", values=graph_parents): # The shape of the _VectorStudentT distribution is governed by the # relationship between df.batch_shape and affine.batch_shape. In # pseudocode the basic procedure is: # if df.batch_shape is scalar: # if affine.batch_shape is not scalar: # # broadcast distribution.sample so # # it has affine.batch_shape. # self.batch_shape = affine.batch_shape # else: # if affine.batch_shape is scalar: # # let affine broadcasting do its thing. # self.batch_shape = df.batch_shape # All of the above magic is actually handled by TransformedDistribution. # Here we really only need to collect the affine.batch_shape and decide # what we're going to pass in to TransformedDistribution's # (override) batch_shape arg. affine = bijectors.Affine( shift=loc, scale_identity_multiplier=scale_identity_multiplier, scale_diag=scale_diag, scale_tril=scale_tril, scale_perturb_factor=scale_perturb_factor, scale_perturb_diag=scale_perturb_diag, validate_args=validate_args) distribution = student_t.StudentT( df=df, loc=array_ops.zeros([], dtype=affine.dtype), scale=array_ops.ones([], dtype=affine.dtype)) batch_shape, override_event_shape = ( distribution_util.shapes_from_loc_and_scale( affine.shift, affine.scale)) override_batch_shape = distribution_util.pick_vector( distribution.is_scalar_batch(), batch_shape, constant_op.constant([], dtype=dtypes.int32)) super(_VectorStudentT, self).__init__( distribution=distribution, bijector=affine, batch_shape=override_batch_shape, event_shape=override_event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def test_static_loc_static_scale_non_matching_event_size_raises(self): loc = constant_op.constant(np.zeros((2, 4))) scale = linear_operator_diag.LinearOperatorDiag(np.ones((5, 1, 3))) with self.assertRaisesRegexp(ValueError, "could not be broadcast"): distribution_util.shapes_from_loc_and_scale(loc, scale)
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, skewness=None, tailweight=None, distribution=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalLinearOperator"): """Construct VectorSinhArcsinhDiag distribution on `R^k`. The arguments `scale_diag` and `scale_identity_multiplier` combine to define the diagonal `scale` referred to in this class docstring: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scale-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. skewness: Skewness parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. tailweight: Tailweight parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is `tf.distributions.Normal(loc=0., scale=1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorSinhArcsinhDiag sample and `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) with ops.name_scope( name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight ]) as name: loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight has_default_skewness = skewness is None skewness = 0. if skewness is None else skewness # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) # C := 2 * scale / F_0(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: # 1. get shapes from looking at loc and the two scale args. # 2. combine scale_diag with scale_identity_multiplier, which gives us # 'scale', which in turn gives us 'C'. scale_linop = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale_linop) # scale_linop.diag_part() is efficient since it is a diag type linop. scale_diag_part = scale_linop.diag_part() dtype = scale_diag_part.dtype if distribution is None: distribution = normal.Normal( loc=array_ops.zeros([], dtype=dtype), scale=array_ops.ones([], dtype=dtype), allow_nan_stats=allow_nan_stats) else: asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: scale_diag_part = control_flow_ops.with_dependencies( asserts, scale_diag_part) # Make the SAS bijector, 'F'. skewness = ops.convert_to_tensor(skewness, dtype=dtype, name="skewness") tailweight = ops.convert_to_tensor( tailweight, dtype=dtype, name="tailweight") f = bijectors.SinhArcsinh( skewness=skewness, tailweight=tailweight) if has_default_skewness: f_noskew = f else: f_noskew = bijectors.SinhArcsinh( skewness=skewness.dtype.as_numpy_dtype(0.), tailweight=tailweight) # Make the Affine bijector, Z --> loc + C * Z. c = 2 * scale_diag_part / f_noskew.forward( ops.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine( shift=loc, scale_diag=c, validate_args=validate_args) bijector = bijectors.Chain([affine, f]) super(VectorSinhArcsinhDiag, self).__init__( distribution=distribution, bijector=bijector, batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters self._loc = loc self._scale = scale_linop self._tailweight = tailweight self._skewness = skewness
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, skewness=None, tailweight=None, distribution=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalLinearOperator"): """Construct VectorSinhArcsinhDiag distribution on `R^k`. The arguments `scale_diag` and `scale_identity_multiplier` combine to define the diagonal `scale` referred to in this class docstring: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scale-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. skewness: Skewness parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. tailweight: Tailweight parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is `ds.Normal(0., 1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorSinhArcsinhDiag sample and `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = locals() with ops.name_scope(name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight ]): loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight skewness = 0. if skewness is None else skewness # Recall, with Z ~ Normal(0, 1), # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) # C := 2 * scale / F(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: # 1. get shapes from looking at loc and the two scale args. # 2. combine scale_diag with scale_identity_multiplier, which gives us # 'scale', which in turn gives us 'C'. scale_linop = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale_linop) # scale_linop.diag_part() is efficient since it is a diag type linop. scale_diag_part = scale_linop.diag_part() dtype = scale_diag_part.dtype if distribution is None: distribution = normal.Normal(loc=array_ops.zeros([], dtype=dtype), scale=array_ops.ones([], dtype=dtype), allow_nan_stats=allow_nan_stats) else: asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: scale_diag_part = control_flow_ops.with_dependencies( asserts, scale_diag_part) # Make the SAS bijector, 'F'. skewness = ops.convert_to_tensor(skewness, dtype=dtype, name="skewness") tailweight = ops.convert_to_tensor(tailweight, dtype=dtype, name="tailweight") f = bijectors.SinhArcsinh(skewness=skewness, tailweight=tailweight, event_ndims=1) # Make the Affine bijector, Z --> loc + C * Z. c = 2 * scale_diag_part / f.forward( ops.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine(shift=loc, scale_diag=c, validate_args=validate_args, event_ndims=1) bijector = bijectors.Chain([affine, f]) super(VectorSinhArcsinhDiag, self).__init__(distribution=distribution, bijector=bijector, batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters self._loc = loc self._scale = scale_linop self._tailweight = tailweight self._skewness = skewness