def log_prob(self, response, predicted_linear_response, name=None): """Computes `D(param=mean(r)).log_prob(response)` for linear response, `r`. Args: response: `float`-like `Tensor` representing observed ("actual") responses. predicted_linear_response: `float`-like `Tensor` corresponding to `tf.matmul(model_matrix, weights)`. name: Python `str` used as TF namescope for ops created by member functions. Default value: `None` (i.e., 'log_prob'). Returns: log_prob: `Tensor` with shape and dtype of `predicted_linear_response` representing the distribution prescribed log-probability of the observed `response`s. """ with self._name_scope( name, 'log_prob', [response, predicted_linear_response]): dtype = dtype_util.common_dtype([response, predicted_linear_response]) response = tf.convert_to_tensor( response, dtype=dtype, name='response') predicted_linear_response = tf.convert_to_tensor( predicted_linear_response, name='predicted_linear_response') return self._log_prob(response, predicted_linear_response)
def _do_maximum_mean(samples, envelope, high, name=None): """Common code between maximum_mean and minimum_mean.""" with tf.name_scope(name, "do_maximum_mean", [samples, envelope, high]): dtype = dtype_util.common_dtype([samples, envelope, high], tf.float32) samples = tf.convert_to_tensor(samples, name="samples", dtype=dtype) envelope = tf.convert_to_tensor(envelope, name="envelope", dtype=dtype) high = tf.convert_to_tensor(high, name="high", dtype=dtype) n = tf.rank(samples) # Move the batch dimension of `samples` to the rightmost position, # where the _batch_sort_vector function wants it. perm = tf.concat([tf.range(1, n), [0]], axis=0) samples = tf.transpose(samples, perm) samples = _batch_sort_vector(samples) # The maximum mean is given by taking `envelope`-worth of # probability from the smallest samples and moving it to the # maximum value. This amounts to: # - ignoring the smallest k samples, where `k/n < envelope` # - taking a `1/n - (envelope - k/n)` part of the index k sample # - taking all the other samples # - and adding `envelope * high` at the end. # The following is a vectorized and batched way of computing this. # `max_mean_contrib` is a mask implementing the previous. batch_size = tf.shape(samples)[-1] batch_size = tf.cast(batch_size, dtype=dtype) step = 1. / batch_size cum_steps = step * tf.range(1, batch_size + 1, dtype=dtype) max_mean_contrib = tf.clip_by_value( cum_steps - envelope[..., tf.newaxis], clip_value_min=0., clip_value_max=step) return tf.reduce_sum(samples * max_mean_contrib, axis=-1) + envelope * high
def __init__(self, total_count, logits=None, probs=None, validate_args=False, allow_nan_stats=True, name="Multinomial"): """Initialize a batch of Multinomial distributions. Args: total_count: Non-negative floating point tensor with shape broadcastable to `[N1,..., Nm]` with `m >= 0`. Defines this as a batch of `N1 x ... x Nm` different Multinomial distributions. Its components should be equal to integer values. logits: Floating point tensor representing unnormalized log-probabilities of a positive event with shape broadcastable to `[N1,..., Nm, K]` `m >= 0`, and the same dtype as `total_count`. Defines this as a batch of `N1 x ... x Nm` different `K` class Multinomial distributions. Only one of `logits` or `probs` should be passed in. probs: Positive floating point tensor with shape broadcastable to `[N1,..., Nm, K]` `m >= 0` and same dtype as `total_count`. Defines this as a batch of `N1 x ... x Nm` different `K` class Multinomial distributions. `probs`'s components in the last portion of its shape should sum to `1`. Only one of `logits` or `probs` should be passed in. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) with tf.name_scope(name, values=[total_count, logits, probs]) as name: dtype = dtype_util.common_dtype([total_count, logits, probs], tf.float32) self._total_count = tf.convert_to_tensor( total_count, name="total_count", dtype=dtype) if validate_args: self._total_count = ( distribution_util.embed_check_nonnegative_integer_form( self._total_count)) self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, multidimensional=True, validate_args=validate_args, name=name, dtype=dtype) self._mean_val = self._total_count[..., tf.newaxis] * self._probs super(Multinomial, self).__init__( dtype=dtype, reparameterization_type=reparameterization.NOT_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._total_count, self._logits, self._probs], name=name)
def __init__(self, total_count, logits=None, probs=None, validate_args=False, allow_nan_stats=True, name="NegativeBinomial"): """Construct NegativeBinomial distributions. Args: total_count: Non-negative floating-point `Tensor` with shape broadcastable to `[B1,..., Bb]` with `b >= 0` and the same dtype as `probs` or `logits`. Defines this as a batch of `N1 x ... x Nm` different Negative Binomial distributions. In practice, this represents the number of negative Bernoulli trials to stop at (the `total_count` of failures), but this is still a valid distribution when `total_count` is a non-integer. logits: Floating-point `Tensor` with shape broadcastable to `[B1, ..., Bb]` where `b >= 0` indicates the number of batch dimensions. Each entry represents logits for the probability of success for independent Negative Binomial distributions and must be in the open interval `(-inf, inf)`. Only one of `logits` or `probs` should be specified. probs: Positive floating-point `Tensor` with shape broadcastable to `[B1, ..., Bb]` where `b >= 0` indicates the number of batch dimensions. Each entry represents the probability of success for independent Negative Binomial distributions and must be in the open interval `(0, 1)`. Only one of `logits` or `probs` should be specified. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) with tf.name_scope(name, values=[total_count, logits, probs]) as name: dtype = dtype_util.common_dtype([total_count, logits, probs], preferred_dtype=tf.float32) self._logits, self._probs = distribution_util.get_logits_and_probs( logits, probs, validate_args=validate_args, name=name, dtype=dtype) total_count = tf.convert_to_tensor( total_count, name="total_count", dtype=dtype) with tf.control_dependencies([tf.assert_positive(total_count)] if validate_args else []): self._total_count = tf.identity(total_count, name="total_count") super(NegativeBinomial, self).__init__( dtype=self._probs.dtype, reparameterization_type=reparameterization.NOT_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._total_count, self._probs, self._logits], name=name)
def true_mean_confidence_interval_by_dkwm( samples, low, high, error_rate=1e-6, name=None): """Computes a confidence interval for the mean of a scalar distribution. In batch mode, computes confidence intervals for all distributions in the batch (which need not be identically distributed). Relies on the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). The probability (over the randomness of drawing the given samples) that any true mean is outside the corresponding returned interval is no more than the given `error_rate`. The size of the intervals scale as `O(1 / sqrt(#samples))`, as `O(high - low)`, and as `O(-log(error_rate))`. Note that `error_rate` is a total error rate for all the confidence intervals in the batch. As such, if the batch is nontrivial, the error rate is not broadcast but divided (evenly) among the batch members. Args: samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low` and `high`. The support is bounded: `low <= samples <= high`. low: Floating-point `Tensor` of lower bounds on the distributions' supports. high: Floating-point `Tensor` of upper bounds on the distributions' supports. error_rate: *Scalar* floating-point `Tensor` admissible total rate of mistakes. name: A name for this operation (optional). Returns: low: A floating-point `Tensor` of stochastic lower bounds on the true means. high: A floating-point `Tensor` of stochastic upper bounds on the true means. """ with tf.name_scope(name, "true_mean_confidence_interval_by_dkwm", [samples, low, high, error_rate]): dtype = dtype_util.common_dtype( [samples, low, high, error_rate], tf.float32) samples = tf.convert_to_tensor(samples, name="samples", dtype=dtype) low = tf.convert_to_tensor(low, name="low", dtype=dtype) high = tf.convert_to_tensor(high, name="high", dtype=dtype) error_rate = tf.convert_to_tensor( error_rate, name="error_rate", dtype=dtype) samples = _check_shape_dominates(samples, [low, high]) tf.assert_scalar(error_rate) # Static shape error_rate = _itemwise_error_rate(error_rate, [low, high], samples) n = tf.shape(samples)[0] envelope = _dkwm_cdf_envelope(n, error_rate) min_mean = _minimum_mean(samples, envelope, low) max_mean = _maximum_mean(samples, envelope, high) return min_mean, max_mean
def __init__(self, df, loc, scale, validate_args=False, allow_nan_stats=True, name="StudentT"): """Construct Student's t distributions. The distributions have degree of freedom `df`, mean `loc`, and scale `scale`. The parameters `df`, `loc`, and `scale` must be shaped in a way that supports broadcasting (e.g. `df + loc + scale` is a valid operation). Args: df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. loc: Floating-point `Tensor`. The mean(s) of the distribution(s). scale: Floating-point `Tensor`. The scaling factor(s) for the distribution(s). Note that `scale` is not technically the standard deviation of this distribution but has semantics more similar to standard deviation than variance. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if loc and scale are different dtypes. """ parameters = dict(locals()) with tf.name_scope(name, values=[df, loc, scale]) as name: dtype = dtype_util.common_dtype([df, loc, scale], tf.float32) df = tf.convert_to_tensor(df, name="df", dtype=dtype) loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype) scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype) with tf.control_dependencies([tf.assert_positive(df)] if validate_args else []): self._df = tf.identity(df) self._loc = tf.identity(loc) self._scale = tf.identity(scale) tf.assert_same_float_dtype( (self._df, self._loc, self._scale)) super(StudentT, self).__init__( dtype=self._scale.dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._df, self._loc, self._scale], name=name)
def __init__(self, concentration, rate, validate_args=False, allow_nan_stats=True, name="InverseGamma"): """Construct InverseGamma with `concentration` and `rate` parameters. The parameters `concentration` and `rate` must be shaped in a way that supports broadcasting (e.g. `concentration + rate` is a valid operation). Args: concentration: Floating point tensor, the concentration params of the distribution(s). Must contain only positive values. rate: Floating point tensor, the inverse scale params of the distribution(s). Must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `concentration` and `rate` are different dtypes. """ parameters = dict(locals()) with tf.name_scope(name, values=[concentration, rate]) as name: dtype = dtype_util.common_dtype([concentration, rate], preferred_dtype=tf.float32) concentration = tf.convert_to_tensor( concentration, name="concentration", dtype=dtype) rate = tf.convert_to_tensor(rate, name="rate", dtype=dtype) with tf.control_dependencies([ tf.assert_positive( concentration, message="Concentration must be positive."), tf.assert_positive( rate, message="Rate must be positive."), ] if validate_args else []): self._concentration = tf.identity(concentration, name="concentration") self._rate = tf.identity(rate, name="rate") tf.assert_same_float_dtype([self._concentration, self._rate]) super(InverseGamma, self).__init__( dtype=self._concentration.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, parameters=parameters, graph_parents=[self._concentration, self._rate], name=name)
def __init__(self, temperature, logits=None, probs=None, validate_args=False, allow_nan_stats=True, name="RelaxedBernoulli"): """Construct RelaxedBernoulli distributions. Args: temperature: An 0-D `Tensor`, representing the temperature of a set of RelaxedBernoulli distributions. The temperature should be positive. logits: An N-D `Tensor` representing the log-odds of a positive event. Each entry in the `Tensor` parametrizes an independent RelaxedBernoulli distribution where the probability of an event is sigmoid(logits). Only one of `logits` or `probs` should be passed in. probs: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent Bernoulli distribution. Only one of `logits` or `probs` should be passed in. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: If both `probs` and `logits` are passed, or if neither. """ parameters = dict(locals()) with tf.name_scope(name, values=[logits, probs, temperature]) as name: dtype = dtype_util.common_dtype([logits, probs, temperature], tf.float32) self._temperature = tf.convert_to_tensor( temperature, name="temperature", dtype=dtype) if validate_args: with tf.control_dependencies([tf.assert_positive(temperature)]): self._temperature = tf.identity(self._temperature) self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, validate_args=validate_args, dtype=dtype) super(RelaxedBernoulli, self).__init__( distribution=logistic.Logistic( self._logits / self._temperature, 1. / self._temperature, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name + "/Logistic"), bijector=sigmoid_bijector.Sigmoid(validate_args=validate_args), validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc, scale, validate_args=False, allow_nan_stats=True, name="Gumbel"): """Construct Gumbel distributions with location and scale `loc` and `scale`. The parameters `loc` and `scale` must be shaped in a way that supports broadcasting (e.g. `loc + scale` is a valid operation). Args: loc: Floating point tensor, the means of the distribution(s). scale: Floating point tensor, the scales of the distribution(s). scale must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. Default value: `False`. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. Default value: `True`. name: Python `str` name prefixed to Ops created by this class. Default value: `'Gumbel'`. Raises: TypeError: if loc and scale are different dtypes. """ with tf.name_scope(name, values=[loc, scale]) as name: dtype = dtype_util.common_dtype([loc, scale], preferred_dtype=tf.float32) loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype) scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype) with tf.control_dependencies([tf.assert_positive(scale)] if validate_args else []): loc = tf.identity(loc, name="loc") scale = tf.identity(scale, name="scale") tf.assert_same_float_dtype([loc, scale]) self._gumbel_bijector = gumbel_bijector.Gumbel( loc=loc, scale=scale, validate_args=validate_args) super(Gumbel, self).__init__( distribution=uniform.Uniform( low=tf.zeros([], dtype=loc.dtype), high=tf.ones([], dtype=loc.dtype), allow_nan_stats=allow_nan_stats), # The Gumbel bijector encodes the quantile # function as the forward, and hence needs to # be inverted. bijector=invert_bijector.Invert(self._gumbel_bijector), batch_shape=distribution_util.get_broadcast_shape(loc, scale), name=name)
def __init__( self, amplitude=None, length_scale=None, scale_mixture_rate=None, feature_ndims=1, validate_args=False, name="RationalQuadratic"): """Construct a RationalQuadratic kernel instance. Args: amplitude: Positive floating point `Tensor` that controls the maximum value of the kernel. Must be broadcastable with `length_scale` and `scale_mixture_rate` and inputs to `apply` and `matrix` methods. length_scale: Positive floating point `Tensor` that controls how sharp or wide the kernel shape is. This provides a characteristic "unit" of length against which `||x - y||` can be compared for scale. Must be broadcastable with `amplitude`, `scale_mixture_rate` and inputs to `apply` and `matrix` methods. scale_mixture_rate: Positive floating point `Tensor` that controls how the ExponentiatedQuadratic kernels are mixed. Must be broadcastable with `amplitude`, `length_scale` and inputs to `apply` and `matrix` methods. feature_ndims: Python `int` number of rightmost dims to include in the squared difference norm in the exponential. validate_args: If `True`, parameters are checked for validity despite possibly degrading runtime performance name: Python `str` name prefixed to Ops created by this class. """ with tf.name_scope( name, values=[amplitude, scale_mixture_rate, length_scale]) as name: dtype = dtype_util.common_dtype([ amplitude, scale_mixture_rate, length_scale], tf.float32) if amplitude is not None: amplitude = tf.convert_to_tensor( amplitude, name="amplitude", dtype=dtype) self._amplitude = _validate_arg_if_not_none( amplitude, tf.assert_positive, validate_args) if length_scale is not None: length_scale = tf.convert_to_tensor( length_scale, name="length_scale", dtype=dtype) self._length_scale = _validate_arg_if_not_none( length_scale, tf.assert_positive, validate_args) if scale_mixture_rate is not None: scale_mixture_rate = tf.convert_to_tensor( scale_mixture_rate, name="scale_mixture_rate", dtype=dtype) self._scale_mixture_rate = _validate_arg_if_not_none( scale_mixture_rate, tf.assert_positive, validate_args) tf.assert_same_float_dtype([self._amplitude, self._length_scale, self._scale_mixture_rate]) super(RationalQuadratic, self).__init__(feature_ndims, dtype=dtype, name=name)
def __init__(self, dimension, concentration, validate_args=False, allow_nan_stats=True, name='LKJ'): """Construct LKJ distributions. Args: dimension: Python `int`. The dimension of the correlation matrices to sample. concentration: `float` or `double` `Tensor`. The positive concentration parameter of the LKJ distributions. The pdf of a sample matrix `X` is proportional to `det(X) ** (concentration - 1)`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value `NaN` to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: If `dimension` is negative. """ if dimension < 0: raise ValueError( 'There are no negative-dimension correlation matrices.') parameters = dict(locals()) with tf.name_scope(name, values=[dimension, concentration]): concentration = tf.convert_to_tensor( concentration, name='concentration', dtype=dtype_util.common_dtype([concentration], preferred_dtype=tf.float32)) with tf.control_dependencies([ # concentration >= 1 # TODO(b/111451422) Generalize to concentration > 0. tf.assert_non_negative(concentration - 1.), ] if validate_args else []): self._dimension = dimension self._concentration = tf.identity(concentration, name='concentration') super(LKJ, self).__init__( dtype=self._concentration.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization.NOT_REPARAMETERIZED, parameters=parameters, graph_parents=[self._concentration], name=name)
def __init__(self, concentration1=None, concentration0=None, validate_args=False, allow_nan_stats=True, name="Beta"): """Initialize a batch of Beta distributions. Args: concentration1: Positive floating-point `Tensor` indicating mean number of successes; aka "alpha". Implies `self.dtype` and `self.batch_shape`, i.e., `concentration1.shape = [N1, N2, ..., Nm] = self.batch_shape`. concentration0: Positive floating-point `Tensor` indicating mean number of failures; aka "beta". Otherwise has same semantics as `concentration1`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) with tf.name_scope(name, values=[concentration1, concentration0]) as name: dtype = dtype_util.common_dtype([concentration1, concentration0], tf.float32) self._concentration1 = self._maybe_assert_valid_concentration( tf.convert_to_tensor( concentration1, name="concentration1", dtype=dtype), validate_args) self._concentration0 = self._maybe_assert_valid_concentration( tf.convert_to_tensor( concentration0, name="concentration0", dtype=dtype), validate_args) tf.assert_same_float_dtype([self._concentration1, self._concentration0]) self._total_concentration = self._concentration1 + self._concentration0 super(Beta, self).__init__( dtype=dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, parameters=parameters, graph_parents=[ self._concentration1, self._concentration0, self._total_concentration ], name=name)
def __init__(self, amplitude=None, length_scale=None, period=None, feature_ndims=1, validate_args=False, name='ExpSinSquared'): """Construct a ExpSinSquared kernel instance. Args: amplitude: Positive floating point `Tensor` that controls the maximum value of the kernel. Must be broadcastable with `period`, `length_scale` and inputs to `apply` and `matrix` methods. A value of `None` is treated like 1. length_scale: Positive floating point `Tensor` that controls how sharp or wide the kernel shape is. This provides a characteristic "unit" of length against which `|x - y|` can be compared for scale. Must be broadcastable with `amplitude`, `period` and inputs to `apply` and `matrix` methods. A value of `None` is treated like 1. period: Positive floating point `Tensor` that controls the period of the kernel. Must be broadcastable with `amplitude`, `length_scale` and inputs to `apply` and `matrix` methods. A value of `None` is treated like 1. feature_ndims: Python `int` number of rightmost dims to include in kernel computation. validate_args: If `True`, parameters are checked for validity despite possibly degrading runtime performance name: Python `str` name prefixed to Ops created by this class. """ with tf.name_scope(name, values=[amplitude, period, length_scale]) as name: dtype = dtype_util.common_dtype([amplitude, period, length_scale], tf.float32) if amplitude is not None: amplitude = tf.convert_to_tensor( amplitude, name='amplitude', dtype=dtype) self._amplitude = _validate_arg_if_not_none( amplitude, tf.assert_positive, validate_args) if period is not None: period = tf.convert_to_tensor(period, name='period', dtype=dtype) self._period = _validate_arg_if_not_none( period, tf.assert_positive, validate_args) if length_scale is not None: length_scale = tf.convert_to_tensor( length_scale, name='length_scale', dtype=dtype) self._length_scale = _validate_arg_if_not_none( length_scale, tf.assert_positive, validate_args) tf.assert_same_float_dtype( [self._amplitude, self._length_scale, self._period]) super(ExpSinSquared, self).__init__(feature_ndims, dtype=dtype, name=name)
def __init__(self, loc, scale, validate_args=False, allow_nan_stats=True, name="Laplace"): """Construct Laplace distribution with parameters `loc` and `scale`. The parameters `loc` and `scale` must be shaped in a way that supports broadcasting (e.g., `loc / scale` is a valid operation). Args: loc: Floating point tensor which characterizes the location (center) of the distribution. scale: Positive floating point tensor which characterizes the spread of the distribution. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `loc` and `scale` are of different dtype. """ parameters = dict(locals()) with tf.name_scope(name, values=[loc, scale]) as name: dtype = dtype_util.common_dtype([loc, scale], tf.float32) loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype) scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype) with tf.control_dependencies([tf.assert_positive(scale)] if validate_args else []): self._loc = tf.identity(loc) self._scale = tf.identity(scale) tf.assert_same_float_dtype([self._loc, self._scale]) super(Laplace, self).__init__( dtype=dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._loc, self._scale], name=name)
def __init__(self, low=0., high=1., validate_args=False, allow_nan_stats=True, name="Uniform"): """Initialize a batch of Uniform distributions. Args: low: Floating point tensor, lower boundary of the output interval. Must have `low < high`. high: Floating point tensor, upper boundary of the output interval. Must have `low < high`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: InvalidArgumentError: if `low >= high` and `validate_args=False`. """ parameters = dict(locals()) with tf.name_scope(name, values=[low, high]) as name: dtype = dtype_util.common_dtype([low, high], tf.float32) low = tf.convert_to_tensor(low, name="low", dtype=dtype) high = tf.convert_to_tensor(high, name="high", dtype=dtype) with tf.control_dependencies([ tf.assert_less( low, high, message="uniform not defined when low >= high.") ] if validate_args else []): self._low = tf.identity(low) self._high = tf.identity(high) tf.assert_same_float_dtype([self._low, self._high]) super(Uniform, self).__init__( dtype=self._low.dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._low, self._high], name=name)
def __init__(self, concentration, scale=1., validate_args=False, allow_nan_stats=True, name="Pareto"): """Construct Pareto distribution with `concentration` and `scale`. Args: concentration: Floating point tensor. Must contain only positive values. scale: Floating point tensor, equivalent to `mode`. `scale` also restricts the domain of this distribution to be in `[scale, inf)`. Must contain only positive values. Default value: `1`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. Default value: `False` (i.e. do not validate args). allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. Default value: `True`. name: Python `str` name prefixed to Ops created by this class. Default value: 'Pareto'. """ parameters = dict(locals()) with tf.name_scope(name, values=[concentration, scale]): dtype = dtype_util.common_dtype([concentration, scale], tf.float32) self._concentration = tf.convert_to_tensor( concentration, name="concentration", dtype=dtype) self._scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype) with tf.control_dependencies([ tf.assert_positive(self._concentration), tf.assert_positive(self._scale)] if validate_args else []): self._concentration = tf.identity( self._concentration, name="concentration") self._scale = tf.identity(self._scale, name="scale") super(Pareto, self).__init__( dtype=self._concentration.dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._concentration, self._scale], name=name)
def __init__(self, loc, scale, validate_args=False, allow_nan_stats=True, name="HalfCauchy"): """Construct a half-Cauchy distribution with `loc` and `scale`. Args: loc: Floating-point `Tensor`; the location(s) of the distribution(s). scale: Floating-point `Tensor`; the scale(s) of the distribution(s). Must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. Default value: `False` (i.e. do not validate args). allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. Default value: `True`. name: Python `str` name prefixed to Ops created by this class. Default value: 'HalfCauchy'. Raises: TypeError: if `loc` and `scale` have different `dtype`. """ parameters = dict(locals()) with tf.name_scope(name, values=[loc, scale]) as name: dtype = dtype_util.common_dtype([loc, scale], preferred_dtype=tf.float32) loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype) scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype) with tf.control_dependencies([tf.assert_positive(scale)] if validate_args else []): self._loc = tf.identity(loc, name="loc") self._scale = tf.identity(scale, name="loc") tf.assert_same_float_dtype([self._loc, self._scale]) super(HalfCauchy, self).__init__( dtype=self._scale.dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._loc, self._scale], name=name)
def _minimum_mean(samples, envelope, low, name=None): """Returns a stochastic lower bound on the mean of a scalar distribution. The idea is that if the true CDF is within an `eps`-envelope of the empirical CDF of the samples, and the support is bounded below, then the mean is bounded below as well. In symbols, ```none sup_x(|F_n(x) - F(x)|) < eps ``` The 0th dimension of `samples` is interpreted as independent and identically distributed samples. The remaining dimensions are broadcast together with `envelope` and `low`, and operated on separately. Args: samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `envelope` and `low`. envelope: Floating-point `Tensor` of sizes of admissible CDF envelopes (i.e., the `eps` above). low: Floating-point `Tensor` of lower bounds on the distributions' supports. `samples >= low`. name: A name for this operation (optional). Returns: bound: Floating-point `Tensor` of lower bounds on the true means. Raises: InvalidArgumentError: If some `sample` is found to be smaller than the corresponding `low`. """ with tf.name_scope(name, "minimum_mean", [samples, envelope, low]): dtype = dtype_util.common_dtype([samples, envelope, low], tf.float32) samples = tf.convert_to_tensor(samples, name="samples", dtype=dtype) envelope = tf.convert_to_tensor(envelope, name="envelope", dtype=dtype) low = tf.convert_to_tensor(low, name="low", dtype=dtype) xmin = tf.reduce_min(samples, axis=[0]) msg = "Given sample minimum value falls below expectations" check_op = tf.assert_greater_equal(xmin, low, message=msg) with tf.control_dependencies([check_op]): return - _do_maximum_mean(-samples, envelope, -low)
def __init__(self, concentration, validate_args=False, allow_nan_stats=True, name="Dirichlet"): """Initialize a batch of Dirichlet distributions. Args: concentration: Positive floating-point `Tensor` indicating mean number of class occurrences; aka "alpha". Implies `self.dtype`, and `self.batch_shape`, `self.event_shape`, i.e., if `concentration.shape = [N1, N2, ..., Nm, k]` then `batch_shape = [N1, N2, ..., Nm]` and `event_shape = [k]`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) with tf.name_scope(name, values=[concentration]) as name: self._concentration = self._maybe_assert_valid_concentration( tf.convert_to_tensor( concentration, name="concentration", dtype=dtype_util.common_dtype([concentration], preferred_dtype=tf.float32)), validate_args) self._total_concentration = tf.reduce_sum(self._concentration, -1) super(Dirichlet, self).__init__( dtype=self._concentration.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, parameters=parameters, graph_parents=[self._concentration, self._total_concentration], name=name)
def __init__(self, df, validate_args=False, allow_nan_stats=True, name="Chi2"): """Construct Chi2 distributions with parameter `df`. Args: df: Floating point tensor, the degrees of freedom of the distribution(s). `df` must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) # Even though all stats of chi2 are defined for valid parameters, this is # not true in the parent class "gamma." therefore, passing # allow_nan_stats=True # through to the parent class results in unnecessary asserts. with tf.name_scope(name, values=[df]) as name: df = tf.convert_to_tensor( df, name="df", dtype=dtype_util.common_dtype([df], preferred_dtype=tf.float32)) with tf.control_dependencies([ tf.assert_positive(df), ] if validate_args else []): self._df = tf.identity(df, name="df") super(Chi2, self).__init__( concentration=0.5 * self._df, rate=0.5, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def __init__(self, concentration, rate, validate_args=False, allow_nan_stats=True, name="InverseGammaWithSoftplusConcentrationRate"): parameters = dict(locals()) with tf.name_scope(name, values=[concentration, rate]) as name: dtype = dtype_util.common_dtype([concentration, rate]) concentration = tf.convert_to_tensor( concentration, name="softplus_concentration", dtype=dtype) rate = tf.convert_to_tensor(rate, name="softplus_rate", dtype=dtype) super(InverseGammaWithSoftplusConcentrationRate, self).__init__( concentration=tf.nn.softplus( concentration, name="softplus_concentration"), rate=tf.nn.softplus(rate, name="softplus_rate"), validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def __init__(self, rate, validate_args=False, allow_nan_stats=True, name="Exponential"): """Construct Exponential distribution with parameter `rate`. Args: rate: Floating point tensor, equivalent to `1 / mean`. Must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) # Even though all statistics of are defined for valid inputs, this is not # true in the parent class "Gamma." Therefore, passing # allow_nan_stats=True # through to the parent class results in unnecessary asserts. with tf.name_scope(name, values=[rate]) as name: self._rate = tf.convert_to_tensor( rate, name="rate", dtype=dtype_util.common_dtype([rate], preferred_dtype=tf.float32)) super(Exponential, self).__init__( concentration=1., rate=self._rate, allow_nan_stats=allow_nan_stats, validate_args=validate_args, name=name) self._parameters = parameters self._graph_parents += [self._rate]
def __init__(self, scale, validate_args=False, allow_nan_stats=True, name="HalfNormal"): """Construct HalfNormals with scale `scale`. Args: scale: Floating point tensor; the scales of the distribution(s). Must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) with tf.name_scope(name, values=[scale]) as name: scale = tf.convert_to_tensor( scale, name="scale", dtype=dtype_util.common_dtype([scale], preferred_dtype=tf.float32)) with tf.control_dependencies([tf.assert_positive(scale)] if validate_args else []): self._scale = tf.identity(scale, name="scale") super(HalfNormal, self).__init__( dtype=self._scale.dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._scale], name=name)
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name="LogNormal"): """Construct a log-normal distribution. The LogNormal distribution models positive-valued random variables whose logarithm is normally distributed with mean `loc` and standard deviation `scale`. It is constructed as the exponential transformation of a Normal distribution. Args: loc: Floating-point `Tensor`; the means of the underlying Normal distribution(s). scale: Floating-point `Tensor`; the stddevs of the underlying Normal distribution(s). validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. """ with tf.name_scope(name, values=[loc, scale]) as name: dtype = dtype_util.common_dtype([loc, scale], tf.float32) super(LogNormal, self).__init__( distribution=normal.Normal( loc=tf.convert_to_tensor(loc, name="loc", dtype=dtype), scale=tf.convert_to_tensor(scale, name="scale", dtype=dtype)), bijector=exp_bijector.Exp(), validate_args=validate_args, name=name)
def _init_params(self, amplitude, length_scale, validate_args): """Shared init logic for `amplitude` and `length_scale` params. Args: amplitude: `Tensor` (or convertible) or `None` to convert, validate. length_scale: `Tensor` (or convertible) or `None` to convert, validate. validate_args: If `True`, parameters are checked for validity despite possibly degrading runtime performance Returns: dtype: The common `DType` of the parameters. """ dtype = dtype_util.common_dtype([amplitude, length_scale], tf.float32) if amplitude is not None: amplitude = tf.convert_to_tensor(amplitude, name='amplitude', dtype=dtype) self._amplitude = _validate_arg_if_not_none(amplitude, tf.assert_positive, validate_args) if length_scale is not None: length_scale = tf.convert_to_tensor( length_scale, name='length_scale', dtype=dtype) self._length_scale = _validate_arg_if_not_none( length_scale, tf.assert_positive, validate_args) tf.assert_same_float_dtype([self.amplitude, self.length_scale]) return dtype
def __init__(self, loc, scale, concentration, rate, validate_args=False, allow_nan_stats=True, name="NormalGamma"): """Initializes a batch of Normal-Gamma distributions. Args: loc: Floating point tensor; the means of the distribution(s). scale: Floating point tensor; the stddevs of the distribution(s). Must contain only positive values. (`scale = sqrt(lambda)` according to the wikipedia article) concentration: Floating point tensor, the concentration params of the distribution(s). Must contain only positive values. rate: Floating point tensor, the inverse scale params of the distribution(s). Must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `concentration` and `rate` are different dtypes. """ parameters = dict(locals()) with tf.name_scope(name, values=[loc, scale, concentration, rate]): dtype = dtype_util.common_dtype([loc, scale, concentration, rate], dtype_hint=tf.float32) loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype) scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype) concentration = tf.convert_to_tensor(concentration, name="concentration", dtype=dtype) rate = tf.convert_to_tensor(rate, name="rate", dtype=dtype) with tf.control_dependencies([ tf.assert_positive(scale), tf.assert_positive(concentration), tf.assert_positive(rate), ] if validate_args else []): self._loc = tf.identity(loc) self._scale = tf.identity(scale) self._concentration = tf.identity(concentration) self._rate = tf.identity(rate) tf.assert_same_float_dtype( [self._loc, self._scale, self._concentration, self._rate]) # the coefficient for the precision self._lambda = tf.square(self._scale) super(NormalGamma, self).__init__( dtype=self._loc.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, parameters=parameters, graph_parents=[ self._loc, self._scale, self._concentration, self._rate ], name=name)
def __init__(self, loc, scale, quadrature_size=8, quadrature_fn=quadrature_scheme_lognormal_quantiles, validate_args=False, allow_nan_stats=True, name="PoissonLogNormalQuadratureCompound"): """Constructs the PoissonLogNormalQuadratureCompound`. Note: `probs` returned by (optional) `quadrature_fn` are presumed to be either a length-`quadrature_size` vector or a batch of vectors in 1-to-1 correspondence with the returned `grid`. (I.e., broadcasting is only partially supported.) Args: loc: `float`-like (batch of) scalar `Tensor`; the location parameter of the LogNormal prior. scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of the LogNormal prior. quadrature_size: Python `int` scalar representing the number of quadrature points. quadrature_fn: Python callable taking `loc`, `scale`, `quadrature_size`, `validate_args` and returning `tuple(grid, probs)` representing the LogNormal grid and corresponding normalized weight. normalized) weight. Default value: `quadrature_scheme_lognormal_quantiles`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `quadrature_grid` and `quadrature_probs` have different base `dtype`. """ parameters = dict(locals()) with tf.name_scope(name, values=[loc, scale]) as name: dtype = dtype_util.common_dtype([loc, scale], tf.float32) if loc is not None: loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype) if scale is not None: scale = tf.convert_to_tensor(scale, dtype=dtype, name="scale") self._quadrature_grid, self._quadrature_probs = tuple(quadrature_fn( loc, scale, quadrature_size, validate_args)) dt = self._quadrature_grid.dtype if dt.base_dtype != self._quadrature_probs.dtype.base_dtype: raise TypeError("Quadrature grid dtype ({}) does not match quadrature " "probs dtype ({}).".format( dt.name, self._quadrature_probs.dtype.name)) self._distribution = poisson.Poisson( log_rate=self._quadrature_grid, validate_args=validate_args, allow_nan_stats=allow_nan_stats) self._mixture_distribution = categorical.Categorical( logits=tf.log(self._quadrature_probs), validate_args=validate_args, allow_nan_stats=allow_nan_stats) self._loc = loc self._scale = scale self._quadrature_size = quadrature_size super(PoissonLogNormalQuadratureCompound, self).__init__( dtype=dt, reparameterization_type=reparameterization.NOT_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[loc, scale], name=name)
def testCommonDtypeFromEdRV(self): # As in tensorflow_probability github issue #221 x = ed.Dirichlet(np.ones(3, dtype='float64')) self.assertEqual( tf.float64, dtype_util.common_dtype([x], preferred_dtype=tf.float32))
def __init__(self, mean_direction, concentration, validate_args=False, allow_nan_stats=True, name='VonMisesFisher'): """Creates a new `VonMisesFisher` instance. Args: mean_direction: Floating-point `Tensor` with shape [B1, ... Bn, D]. A unit vector indicating the mode of the distribution, or the unit-normalized direction of the mean. (This is *not* in general the mean of the distribution; the mean is not generally in the support of the distribution.) NOTE: `D` is currently restricted to <= 5. concentration: Floating-point `Tensor` having batch shape [B1, ... Bn] broadcastable with `mean_direction`. The level of concentration of samples around the `mean_direction`. `concentration=0` indicates a uniform distribution over the unit hypersphere, and `concentration=+inf` indicates a `Deterministic` distribution (delta function) at `mean_direction`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: For known-bad arguments, i.e. unsupported event dimension. """ parameters = dict(locals()) with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([mean_direction, concentration], tf.float32) mean_direction = tf.convert_to_tensor(value=mean_direction, name='mean_direction', dtype=dtype) concentration = tf.convert_to_tensor(value=concentration, name='concentration', dtype=dtype) assertions = [ assert_util.assert_non_negative( concentration, message='`concentration` must be non-negative'), assert_util.assert_greater( tf.shape(input=mean_direction)[-1], 1, message='`mean_direction` may not have scalar event shape' ), assert_util.assert_near( 1., tf.linalg.norm(tensor=mean_direction, axis=-1), message='`mean_direction` must be unit-length') ] if validate_args else [] static_event_dim = tf.compat.dimension_value( tensorshape_util.with_rank_at_least(mean_direction.shape, 1)[-1]) if static_event_dim is not None and static_event_dim > 5: raise ValueError('vMF ndims > 5 is not currently supported') elif validate_args: assertions += [ assert_util.assert_less_equal( tf.shape(input=mean_direction)[-1], 5, message='vMF ndims > 5 is not currently supported') ] with tf.control_dependencies(assertions): self._mean_direction = tf.identity(mean_direction) self._concentration = tf.identity(concentration) dtype_util.assert_same_float_dtype( [self._mean_direction, self._concentration]) # mean_direction is always reparameterized. # concentration is only for event_dim==3, via an inversion sampler. reparameterization_type = (reparameterization.FULLY_REPARAMETERIZED if static_event_dim == 3 else reparameterization.NOT_REPARAMETERIZED) super(VonMisesFisher, self).__init__( dtype=self._concentration.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization_type, parameters=parameters, graph_parents=[self._mean_direction, self._concentration], name=name)
def __init__(self, power, dtype=tf.int32, interpolate_nondiscrete=True, sample_maximum_iterations=100, validate_args=False, allow_nan_stats=False, name="Zipf"): """Initialize a batch of Zipf distributions. Args: power: `Float` like `Tensor` representing the power parameter. Must be strictly greater than `1`. dtype: The `dtype` of `Tensor` returned by `sample`. Default value: `tf.int32`. interpolate_nondiscrete: Python `bool`. When `False`, `log_prob` returns `-inf` (and `prob` returns `0`) for non-integer inputs. When `True`, `log_prob` evaluates the continuous function `-power log(k) - log(zeta(power))` , which matches the Zipf pmf at integer arguments `k` (note that this function is not itself a normalized probability log-density). Default value: `True`. sample_maximum_iterations: Maximum number of iterations of allowable iterations in `sample`. When `validate_args=True`, samples which fail to reach convergence (subject to this cap) are masked out with `self.dtype.min` or `nan` depending on `self.dtype.is_integer`. Default value: `100`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. Default value: `False`. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. Default value: `False`. name: Python `str` name prefixed to Ops created by this class. Default value: `'Zipf'`. Raises: TypeError: if `power` is not `float` like. """ parameters = dict(locals()) with tf.name_scope(name) as name: power = tf.convert_to_tensor(power, name="power", dtype=dtype_util.common_dtype( [power], dtype_hint=tf.float32)) if (not dtype_util.is_floating(power.dtype) or dtype_util.base_equal(power.dtype, tf.float16)): raise TypeError( "power.dtype ({}) is not a supported `float` type.".format( dtype_util.name(power.dtype))) runtime_assertions = [] if validate_args: runtime_assertions.append( assert_util.assert_greater( power, np.ones([], power.dtype.as_numpy_dtype))) with tf.control_dependencies(runtime_assertions): self._power = tf.identity(power, name="power") self._interpolate_nondiscrete = interpolate_nondiscrete self._sample_maximum_iterations = sample_maximum_iterations super(Zipf, self).__init__( dtype=dtype, reparameterization_type=reparameterization.NOT_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[self._power], name=name)
def __init__(self, level_scale_prior=None, slope_scale_prior=None, initial_level_prior=None, initial_slope_prior=None, observed_time_series=None, name=None): """Specify a local linear trend model. Args: level_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `level_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. slope_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `slope_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_level_prior: optional `tfd.Distribution` instance specifying a prior on the initial level. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_slope_prior: optional `tfd.Distribution` instance specifying a prior on the initial slope. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any `NaN`s are interpreted as missing observations; missingness may be also be explicitly specified by passing a `tfp.sts.MaskedTimeSeries` instance. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). Default value: `None`. name: the name of this model component. Default value: 'LocalLinearTrend'. """ init_parameters = dict(locals()) with tf.name_scope(name or 'LocalLinearTrend') as name: _, observed_stddev, observed_initial = ( sts_util.empirical_statistics(observed_time_series) if observed_time_series is not None else (0., 1., 0.)) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if level_scale_prior is None: level_scale_prior = tfd.LogNormal(loc=tf.math.log( .05 * observed_stddev), scale=3., name='level_scale_prior') if slope_scale_prior is None: slope_scale_prior = tfd.LogNormal(loc=tf.math.log( .05 * observed_stddev), scale=3., name='slope_scale_prior') if initial_level_prior is None: initial_level_prior = tfd.Normal( loc=observed_initial, scale=tf.abs(observed_initial) + observed_stddev, name='initial_level_prior') if initial_slope_prior is None: initial_slope_prior = tfd.Normal(loc=0., scale=observed_stddev, name='initial_slope_prior') dtype = dtype_util.common_dtype([ level_scale_prior, slope_scale_prior, initial_level_prior, initial_slope_prior ]) self._initial_state_prior = tfd.MultivariateNormalDiag( loc=tf.stack( [initial_level_prior.mean(), initial_slope_prior.mean()], axis=-1), scale_diag=tf.stack([ initial_level_prior.stddev(), initial_slope_prior.stddev() ], axis=-1)) scaled_softplus = tfb.Chain([ tfb.Scale(scale=observed_stddev), tfb.Softplus(low=dtype_util.eps(dtype)) ]) super(LocalLinearTrend, self).__init__(parameters=[ Parameter('level_scale', level_scale_prior, scaled_softplus), Parameter('slope_scale', slope_scale_prior, scaled_softplus) ], latent_size=2, init_parameters=init_parameters, name=name)
def testCommonDtypeFromEdRV(self): # As in tensorflow_probability github issue #221 x = ed.Dirichlet(np.ones(3, dtype='float64')) self.assertEqual( tf.float64, dtype_util.common_dtype([x], preferred_dtype=tf.float32))
def __init__(self, order, coefficients_prior=None, level_scale_prior=None, initial_state_prior=None, coefficient_constraining_bijector=None, observed_time_series=None, name=None): """Specify an autoregressive model. Args: order: scalar Python positive `int` specifying the number of past timesteps to regress on. coefficients_prior: optional `tfd.Distribution` instance specifying a prior on the `coefficients` parameter. If `None`, a default standard normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([order]))`) prior is used. Default value: `None`. level_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `level_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_state_prior: optional `tfd.Distribution` instance specifying a prior on the initial state, corresponding to the values of the process at a set of size `order` of imagined timesteps before the initial step. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. coefficient_constraining_bijector: optional `tfb.Bijector` instance representing a constraining mapping for the autoregressive coefficients. For example, `tfb.Tanh()` constrains the coefficients to lie in `(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and `tfb.Identity()` implies no constraint. If `None`, the default behavior constrains the coefficients to lie in `(-1, 1)` using a `Tanh` bijector. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Default value: `None`. name: the name of this model component. Default value: 'Autoregressive'. """ with tf1.name_scope( name, 'Autoregressive', values=[observed_time_series]) as name: masked_time_series = None if observed_time_series is not None: masked_time_series = ( sts_util.canonicalize_observed_time_series_with_mask( observed_time_series)) dtype = dtype_util.common_dtype( [(masked_time_series.time_series if masked_time_series is not None else None), coefficients_prior, level_scale_prior, initial_state_prior], dtype_hint=tf.float32) if observed_time_series is not None: _, observed_stddev, observed_initial = sts_util.empirical_statistics( masked_time_series) else: observed_stddev, observed_initial = ( tf.convert_to_tensor(value=1., dtype=dtype), tf.convert_to_tensor(value=0., dtype=dtype)) batch_ones = tf.ones(tf.concat([ tf.shape(input=observed_initial), # Batch shape [order]], axis=0), dtype=dtype) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if coefficients_prior is None: coefficients_prior = tfd.MultivariateNormalDiag( scale_diag=batch_ones) if level_scale_prior is None: level_scale_prior = tfd.LogNormal( loc=tf.math.log(0.05 * observed_stddev), scale=3.) if (coefficients_prior.event_shape.is_fully_defined() and order != coefficients_prior.event_shape[0]): raise ValueError("Prior dimension {} doesn't match order {}.".format( coefficients_prior.event_shape[0], order)) if initial_state_prior is None: initial_state_prior = tfd.MultivariateNormalDiag( loc=observed_initial[..., tf.newaxis] * batch_ones, scale_diag=(tf.abs(observed_initial) + observed_stddev)[..., tf.newaxis] * batch_ones) self._order = order self._coefficients_prior = coefficients_prior self._level_scale_prior = level_scale_prior self._initial_state_prior = initial_state_prior if coefficient_constraining_bijector is None: coefficient_constraining_bijector = tfb.Tanh() super(Autoregressive, self).__init__( parameters=[ Parameter('coefficients', coefficients_prior, coefficient_constraining_bijector), Parameter('level_scale', level_scale_prior, tfb.Chain([tfb.AffineScalar(scale=observed_stddev), tfb.Softplus()])) ], latent_size=order, name=name)
def __init__(self, kernel_fn, x1, x2=None, kernel_args=None, num_matmul_parts=None, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, is_square=None, name=None): """Initializes the LinearOperator. This object implicitly represents the covariance matrix of `x1` and `x2` (`x1` if `x2` not provided) under the given `kernel`. This operator assumes one example dimension on each set of index points, which indexes the corresponding axis of the matrix. All outer dimensions are considered batch dimensions. Use this to avoid materializing the full matrix for such operations as: - accessing diagonal (`diag_part` method) - accessing a [batch of] row[s] (`row` method) - accessing a [batch of] column[s] (`col` method) Use this to perform matrix-vector products on very large covariance matrices by chunking the covariance matrix into parts, computing just that part of the output, computing a part of the covariance matrix, and the matrix-vector product, then forgetting the partial covariance matrix. Internally, uses recomputed gradients to avoid retaining infeasibly-large intermediate activations. Args: kernel_fn: A Python callable which takes `*kernel_args` and returns an instance of `tfp.math.psd_kernels.PositiveSemidefiniteKernel`. As a convenience, an instance may be passed instead of a function, but in this case gradients to kernel hyperparameters will not be available when using `num_matmul_parts`, and `kernel_args` must be `None`. x1: A floating point `Tensor`, the row index points. x2: Optional, a floating point `Tensor`, the column index points. If not provided, uses `x1`. kernel_args: A tuple of arguments (which may themselves be `tf.nest` compatible structures) to be passed to `kernel_fn`. This argument identifies the set of tensors which will have gradients in a `num_matmul_parts`-chunked matmul/backprop. num_matmul_parts: An optional Python `int` specifying the number of partitions into which the matrix should be broken when applying this linear operator. (An extra remainder partition is implied for uneven partitioning.) Because the use-case is avoiding a memory blowup, the partitioned matmul uses `parallel_iterations=1` and `back_prop=False`. is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. If `dtype` is real, this is equivalent to being symmetric. is_positive_definite: Expect that this operator is positive definite, meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. name: Optional name for related ops. """ if not callable(kernel_fn): kernel = kernel_fn kernel_fn = lambda: kernel if kernel_args is not None: raise ValueError( 'Cannot pass a kernel instance for `kernel_fn` while ' 'also specifying `kernel_args`.') if kernel_args is None: kernel_args = () dtype = dtype_util.common_dtype([kernel_fn(*kernel_args), x1, x2], dtype_hint=tf.float64) self._kernel_fn = kernel_fn self._kernel_args = tf.nest.map_structure( tensor_util.convert_nonref_to_tensor, kernel_args) self._x1 = tensor_util.convert_nonref_to_tensor(x1, dtype=dtype) self._x2 = tensor_util.convert_nonref_to_tensor(x2, dtype=dtype) self._num_matmul_parts = tensor_util.convert_nonref_to_tensor( num_matmul_parts, dtype=tf.int32) if self._x2 is None: if is_non_singular is not None and not is_non_singular: raise ValueError( 'Operator is non-singular with a single set of index points.' ) is_non_singular = True if is_self_adjoint is not None and not is_self_adjoint: raise ValueError( 'Operator is self-adjoint with a single set of index points.' ) is_self_adjoint = True if is_positive_definite is not None and not is_positive_definite: raise ValueError( 'Operator is positive-definite with a single set of index points.' ) is_positive_definite = True if is_square is not None and not is_square: raise ValueError( 'Operator is square with a single set of index points.') is_square = True super(LinearOperatorPSDKernel, self).__init__(dtype, is_non_singular=is_non_singular, is_self_adjoint=is_self_adjoint, is_positive_definite=is_positive_definite, is_square=is_square, name=name or 'LinearOperatorPSDKernel')
def __init__(self, loc, scale, low, high, validate_args=False, allow_nan_stats=True, name='TruncatedNormal'): """Construct TruncatedNormal. All parameters of the distribution will be broadcast to the same shape, so the resulting distribution will have a batch_shape of the broadcast shape of all parameters. Args: loc: Floating point tensor; the mean of the normal distribution(s) ( note that the mean of the resulting distribution will be different since it is modified by the bounds). scale: Floating point tensor; the std deviation of the normal distribution(s). low: `float` `Tensor` representing lower bound of the distribution's support. Must be such that `low < high`. high: `float` `Tensor` representing upper bound of the distribution's support. Must be such that `low < high`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked at run-time. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value '`NaN`' to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([loc, scale, low, high], tf.float32) self._loc = tensor_util.convert_nonref_to_tensor(loc, name='loc', dtype=dtype) self._scale = tensor_util.convert_nonref_to_tensor(scale, name='scale', dtype=dtype) self._low = tensor_util.convert_nonref_to_tensor(low, name='low', dtype=dtype) self._high = tensor_util.convert_nonref_to_tensor(high, name='high', dtype=dtype) dtype_util.assert_same_float_dtype( [self._loc, self._scale, self._low, self._high]) super(TruncatedNormal, self).__init__( dtype=dtype, # This distribution is fully reparameterized. loc, scale have straight # through gradients. The gradients for the bounds are implemented # using custom derived expressions based on implicit gradients. # For the special case of lower bound zero and a positive upper bound # an equivalent expression can also be found in Sec 9.1.1. # of https://arxiv.org/pdf/1806.01851.pdf. The implementation here # handles arbitrary bounds. reparameterization_type=reparameterization. FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, name=name)
def _random_gamma_rejection(shape, concentration, rate=None, log_rate=None, seed=None, log_space=False, internal_dtype=None): """Samples from the gamma distribution. The sampling algorithm is rejection sampling [1], and pathwise gradients with respect to concentration are computed via implicit differentiation [2]. Args: shape: The output sample shape. Trailing dims must match broadcast of `concentration` with `rate` or `log_rate`. concentration: Floating point tensor, the concentration params of the distribution(s). Must contain only positive values. Must broadcast with `rate` or `log_rate`, if given. rate: Floating point tensor, the inverse scale params of the distribution(s). Must contain only positive values. Must broadcast with `concentration`. If `None`, handled as if 1 (but possibly more efficiently). Mutually exclusive with `log_rate`. log_rate: Floating point tensor, log of the inverse scale params of the distribution(s). Must broadcast with `concentration`. If `None`, handled as if 0 (but possibly more efficiently). Mutually exclusive with `rate`. seed: PRNG seed; see `tfp.random.sanitize_seed` for details. log_space: Optionally sample log(gamma) variates. internal_dtype: dtype to use for internal computations. If unspecified, we use the same dtype as the output (i.e. the dtype of `concentration`, `rate`, or `log_rate`) when `log_space==True`, and `tf.float64` otherwise. Returns: Differentiable samples from the gamma distribution. #### References [1] George Marsaglia and Wai Wan Tsang. A simple method for generating Gamma variables. ACM Transactions on Mathematical Software, 2000. [2] Michael Figurnov, Shakir Mohamed, and Andriy Mnih. Implicit Reparameterization Gradients. Neural Information Processing Systems, 2018. """ generate_and_test_samples_seed, concentration_fix_seed = samplers.split_seed( seed, salt='random_gamma') output_dtype = dtype_util.common_dtype([concentration, rate, log_rate], dtype_hint=tf.float32) if internal_dtype is None: internal_dtype = output_dtype if log_space else tf.float64 def rejection_sample(concentration): """Gamma rejection sampler.""" # Note, concentration here already has a shape that is broadcast with rate. cast_concentration = tf.cast(concentration, internal_dtype) good_params_mask = (concentration >= 0.) # When replacing NaN values, use 100. for concentration, since that leads to # a high-likelihood of the rejection sampler accepting on the first pass. safe_concentration = tf.where(good_params_mask, cast_concentration, 100.) modified_safe_concentration = tf.where(safe_concentration < 1., safe_concentration + 1., safe_concentration) one_third = tf.constant(1. / 3, dtype=internal_dtype) d = modified_safe_concentration - one_third c = one_third * tf.math.rsqrt(d) def generate_and_test_samples(seed): """Generate and test samples.""" v_seed, u_seed = samplers.split_seed(seed) x = samplers.normal(shape, dtype=internal_dtype, seed=v_seed) # This implicitly broadcasts concentration up to sample shape. v = 1 + c * x # In [1], there is an 'inner' rejection sampling loop which checks that # v > 0 and generates a new normal sample if it's not, saving the rest of # the computations below. We found that merging the check for v > 0 with # the `good_sample_mask` not only simplifies the code, but leads to a # ~2x speedup for small concentrations on GPU, at the cost of deviating # slightly from the implementation given in Ref. [1]. accept_v = v > 0. logv = tf.math.log1p(c * x) x2 = x * x v3 = v * v * v logv3 = logv * 3 u = samplers.uniform(shape, dtype=internal_dtype, seed=u_seed) # In [1], the suggestion is to first check u < 1 - 0.331 * x2 * x2, and to # run the check below only if it fails, in order to avoid the relatively # expensive logarithm calls. Our algorithm operates in batch mode: we will # have to compute or not compute the logarithms for the entire batch, and # as the batch gets larger, the odds we compute it grow. Therefore we # don't bother with the "cheap" check. good_sample_mask = tf.logical_and( tf.math.log(u) < (x2 / 2. + d * (1 - v3 + logv3)), accept_v) return logv3 if log_space else v3, good_sample_mask samples = brs.batched_las_vegas_algorithm( generate_and_test_samples, seed=generate_and_test_samples_seed)[0] concentration_fix_unif = samplers.uniform( # in [0, 1) shape, dtype=internal_dtype, seed=concentration_fix_seed) if log_space: concentration_lt_one_fix = tf.where( safe_concentration < 1., # Why do we use log1p(-x)? x is in [0, 1) and log(0) = -inf, is bad. # x ~ U(0,1) => 1-x ~ U(0,1) # But at the boundary, 1-x in (0, 1]. Good. # So we can take log(unif(0,1)) safely as log(1-unif(0,1)). # log1p(-0) = 0, and log1p(-almost_one) = -not_quite_inf. Good. tf.math.log1p(-concentration_fix_unif) / safe_concentration, tf.zeros((), dtype=internal_dtype)) samples = samples + tf.math.log(d) + concentration_lt_one_fix else: concentration_lt_one_fix = tf.where( safe_concentration < 1., tf.math.pow(concentration_fix_unif, tf.math.reciprocal(safe_concentration)), tf.ones((), dtype=internal_dtype)) samples = samples * d * concentration_lt_one_fix samples = tf.where(good_params_mask, samples, np.nan) output_type_samples = tf.cast(samples, output_dtype) return output_type_samples broadcast_conc_shape = ps.broadcast_shape(ps.shape(concentration), _shape_or_scalar(rate, log_rate)) broadcast_concentration = tf.broadcast_to(concentration, broadcast_conc_shape) concentration_samples = rejection_sample(broadcast_concentration) if rate is not None and log_rate is not None: raise ValueError('`rate` and `log_rate` are mutually exclusive.') if rate is None and log_rate is None: if not log_space: concentration_samples = _fix_zero_samples(concentration_samples) return concentration_samples if log_space: if log_rate is None: log_rate = tf.math.log(tf.where(rate >= 0., rate, np.nan)) return concentration_samples - log_rate else: if rate is None: rate = tf.math.exp(log_rate) corrected_rate = tf.where(rate >= 0., rate, np.nan) # 0 rate is infinite scale, which implies a +inf sample. ret = tf.where( tf.equal(corrected_rate, 0), tf.constant(np.inf, dtype=output_dtype), _fix_zero_samples(concentration_samples / corrected_rate)) return ret
def __init__(self, concentration, rate=None, log_rate=None, validate_args=False, allow_nan_stats=True, force_probs_to_zero_outside_support=False, name='Gamma'): """Construct Gamma with `concentration` and `rate` parameters. The parameters `concentration` and `rate` must be shaped in a way that supports broadcasting (e.g. `concentration + rate` is a valid operation). Args: concentration: Floating point tensor, the concentration params of the distribution(s). Must contain only positive values. rate: Floating point tensor, the inverse scale params of the distribution(s). Must contain only positive values. Mutually exclusive with `log_rate`. log_rate: Floating point tensor, natural logarithm of the inverse scale params of the distribution(s). Mutually exclusive with `rate`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. force_probs_to_zero_outside_support: If `True`, force `prob(x) == 0` and `log_prob(x) == -inf` for values of x outside the distribution support. name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `concentration` and `rate` are different dtypes. """ parameters = dict(locals()) self._force_probs_to_zero_outside_support = ( force_probs_to_zero_outside_support) if (rate is None) == (log_rate is None): raise ValueError( 'Exactly one of `rate` or `log_rate` must be specified.') with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([concentration, rate, log_rate], dtype_hint=tf.float32) self._concentration = tensor_util.convert_nonref_to_tensor( concentration, dtype=dtype, name='concentration') self._rate = tensor_util.convert_nonref_to_tensor(rate, dtype=dtype, name='rate') self._log_rate = tensor_util.convert_nonref_to_tensor( log_rate, dtype=dtype, name='log_rate') super(Gamma, self).__init__(dtype=dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization. FULLY_REPARAMETERIZED, parameters=parameters, name=name)
def __init__(self, dimension, concentration, input_output_cholesky=False, validate_args=False, allow_nan_stats=True, name='LKJ'): """Construct LKJ distributions. Args: dimension: Python `int`. The dimension of the correlation matrices to sample. concentration: `float` or `double` `Tensor`. The positive concentration parameter of the LKJ distributions. The pdf of a sample matrix `X` is proportional to `det(X) ** (concentration - 1)`. input_output_cholesky: Python `bool`. If `True`, functions whose input or output have the semantics of samples assume inputs are in Cholesky form and return outputs in Cholesky form. In particular, if this flag is `True`, input to `log_prob` is presumed of Cholesky form and output from `sample` is of Cholesky form. Setting this argument to `True` is purely a computational optimization and does not change the underlying distribution. Additionally, validation checks which are only defined on the multiplied-out form are omitted, even if `validate_args` is `True`. Default value: `False` (i.e., input/output does not have Cholesky semantics). validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value `NaN` to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: If `dimension` is negative. """ if dimension < 0: raise ValueError( 'There are no negative-dimension correlation matrices.') parameters = dict(locals()) self._input_output_cholesky = input_output_cholesky with tf.name_scope(name): concentration = tf.convert_to_tensor( concentration, name='concentration', dtype=dtype_util.common_dtype([concentration], dtype_hint=tf.float32)) with tf.control_dependencies([ # concentration >= 1 # TODO(b/111451422) Generalize to concentration > 0. assert_util.assert_non_negative(concentration - 1.), ] if validate_args else []): self._dimension = dimension self._concentration = tf.identity(concentration, name='concentration') super(LKJ, self).__init__( dtype=self._concentration.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization.NOT_REPARAMETERIZED, parameters=parameters, graph_parents=[self._concentration], name=name)
def histogram(x, edges, axis=None, extend_lower_interval=False, extend_upper_interval=False, dtype=None, name=None): """Count how often `x` falls in intervals defined by `edges`. Given `edges = [c0, ..., cK]`, defining intervals `I0 = [c0, c1)`, `I1 = [c1, c2)`, ..., `I_{K-1} = [c_{K-1}, cK]`, This function counts how often `x` falls into each interval. Values of `x` outside of the intervals cause errors. Consider using `extend_lower_interval`, `extend_upper_interval` to deal with this. Args: x: Numeric `N-D` `Tensor` with `N > 0`. If `axis` is not `None`, must have statically known number of dimensions. The `axis` kwarg determines which dimensions index iid samples. Other dimensions of `x` index "events" for which we will compute different histograms. edges: `Tensor` of same `dtype` as `x`. The first dimension indexes edges of intervals. Must either be `1-D` or have `edges.shape[1:]` the same as the dimensions of `x` excluding `axis`. If `rank(edges) > 1`, `edges[k]` designates a shape `edges.shape[1:]` `Tensor` of interval edges for the corresponding dimensions of `x`. axis: Optional `0-D` or `1-D` integer `Tensor` with constant values. The axis in `x` that index iid samples. `Default value:` `None` (treat every dimension as sample dimension). extend_lower_interval: Python `bool`. If `True`, extend the lowest interval `I0` to `(-inf, c1]`. extend_upper_interval: Python `bool`. If `True`, extend the upper interval `I_{K-1}` to `[c_{K-1}, +inf)`. dtype: The output type (`int32` or `int64`). `Default value:` `x.dtype`. name: A Python string name to prepend to created ops. `Default value:` 'histogram' Returns: counts: `Tensor` of type `dtype` and, with `~axis = [i for i in range(arr.ndim) if i not in axis]`, `counts.shape = [edges.shape[0]] + x.shape[~axis]`. With `I` a multi-index into `~axis`, `counts[k][I]` is the number of times event(s) fell into the `kth` interval of `edges`. #### Examples ```python # x.shape = [1000, 2] # x[:, 0] ~ Uniform(0, 1), x[:, 1] ~ Uniform(1, 2). x = tf.stack([tf.random_uniform([1000]), 1 + tf.random_uniform([1000])], axis=-1) # edges ==> bins [0, 0.5), [0.5, 1.0), [1.0, 1.5), [1.5, 2.0]. edges = [0., 0.5, 1.0, 1.5, 2.0] tfp.stats.histogram(x, edges) ==> approximately [500, 500, 500, 500] tfp.stats.histogram(x, edges, axis=0) ==> approximately [[500, 500, 0, 0], [0, 0, 500, 500]] ``` """ with tf.compat.v1.name_scope(name, 'histogram', values=[x, edges, axis]): # Tensor conversions. in_dtype = dtype_util.common_dtype([x, edges], dtype_hint=tf.float32) x = tf.convert_to_tensor(value=x, name='x', dtype=in_dtype) edges = tf.convert_to_tensor(value=edges, name='edges', dtype=in_dtype) # Move dims in axis to the left end as one flattened dim. # After this, x.shape = [n_samples] + E. if axis is None: x = tf.reshape(x, shape=[-1]) else: x_ndims = _get_static_ndims(x, expect_static=True, expect_ndims_at_least=1) axis = _make_static_axis_non_negative_list(axis, x_ndims) if not axis: raise ValueError( '`axis` cannot be empty. Found: {}'.format(axis)) x = _move_dims_to_flat_end(x, axis, x_ndims, right_end=False) # bins.shape = x.shape = [n_samples] + E, # and bins[i] is a shape E Tensor of the bins that sample `i` fell into. # E is the "event shape", which is [] if axis is None. bins = find_bins( x, edges=edges, # If not extending intervals, then values outside the edges will return # -1, which gives an error when fed to bincount. extend_lower_interval=extend_lower_interval, extend_upper_interval=extend_upper_interval, dtype=tf.int32) # TODO(b/124015136) Use standard tf.math.bincount once it supports `axis`. counts = count_integers( bins, # Ensure we get correct output, even if x did not fall into every bin minlength=tf.shape(input=edges)[0] - 1, maxlength=tf.shape(input=edges)[0] - 1, axis=0, dtype=dtype or in_dtype) n_edges = tf.compat.dimension_value(edges.shape[0]) if n_edges is not None: counts.set_shape( tf.TensorShape([n_edges - 1]).concatenate(counts.shape[1:])) return counts
def testNoModifyArgsList(self): x = tf.ones(3, tf.float32) y = tf.zeros(4, tf.float32) lst = [x, y] self.assertEqual(tf.float32, dtype_util.common_dtype(lst)) self.assertLen(lst, 2)
def __init__(self, num_timesteps, period, frequency_multipliers, drift_scale, initial_state_prior, observation_noise_scale=0., initial_step=0, validate_args=False, allow_nan_stats=True, name=None): """Build a smooth seasonal state space model. Args: num_timesteps: Scalar `int` `Tensor` number of timesteps to model with this distribution. period: positive scalar `float` `Tensor` giving the number of timesteps required for the longest cyclic effect to repeat. frequency_multipliers: One-dimensional `float` `Tensor` listing the frequencies (cyclic components) included in the model, as multipliers of the base/fundamental frequency `2. * pi / period`. Each component is specified by the number of times it repeats per period, and adds two latent dimensions to the model. A smooth seasonal model that can represent any periodic function is given by `frequency_multipliers = [1, 2, ..., floor(period / 2)]`. However, it is often desirable to enforce a smoothness assumption (and reduce the computational burden) by dropping some of the higher frequencies. drift_scale: Scalar (any additional dimensions are treated as batch dimensions) `float` `Tensor` indicating the standard deviation of the latent state transitions. initial_state_prior: instance of `tfd.MultivariateNormal` representing the prior distribution on latent states. Must have event shape `[num_features]`. observation_noise_scale: Scalar (any additional dimensions are treated as batch dimensions) `float` `Tensor` indicating the standard deviation of the observation noise. Default value: `0.`. initial_step: scalar `int` `Tensor` specifying the starting timestep. Default value: `0`. validate_args: Python `bool`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. Default value: `False`. allow_nan_stats: Python `bool`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member. If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. Default value: `True`. name: Python `str` name prefixed to ops created by this class. Default value: 'SmoothSeasonalStateSpaceModel'. """ parameters = dict(locals()) with tf.name_scope(name or 'SmoothSeasonalStateSpaceModel') as name: dtype = dtype_util.common_dtype([ period, frequency_multipliers, drift_scale, initial_state_prior ]) period = tf.convert_to_tensor(value=period, name='period', dtype=dtype) frequency_multipliers = tf.convert_to_tensor( value=frequency_multipliers, name='frequency_multipliers', dtype=dtype) drift_scale = tf.convert_to_tensor(value=drift_scale, name='drift_scale', dtype=dtype) observation_noise_scale = tf.convert_to_tensor( value=observation_noise_scale, name='observation_noise_scale', dtype=dtype) num_frequencies = static_num_frequencies(frequency_multipliers) observation_matrix = tf.tile(tf.constant([[1., 0.]], dtype=dtype), multiples=[1, num_frequencies]) transition_matrix = build_smooth_seasonal_transition_matrix( period=period, frequency_multipliers=frequency_multipliers, dtype=dtype) self._drift_scale = drift_scale self._observation_noise_scale = observation_noise_scale self._period = period self._frequency_multipliers = frequency_multipliers super(SmoothSeasonalStateSpaceModel, self).__init__( num_timesteps=num_timesteps, transition_matrix=transition_matrix, transition_noise=tfd.MultivariateNormalDiag( scale_diag=(drift_scale[..., tf.newaxis] * tf.ones([2 * num_frequencies], dtype=dtype)), name='transition_noise'), observation_matrix=observation_matrix, observation_noise=tfd.MultivariateNormalDiag( scale_diag=observation_noise_scale[..., tf.newaxis], name='observation_noise'), initial_state_prior=initial_state_prior, initial_step=initial_step, allow_nan_stats=allow_nan_stats, validate_args=validate_args, name=name) self._parameters = parameters
def testCommonDtypeFromLinop(self): x = tf.linalg.LinearOperatorDiag(tf.ones(3, tf.float16)) self.assertEqual(tf.float16, dtype_util.common_dtype([x], dtype_hint=tf.float32))
def __init__(self, level_scale_prior=None, initial_level_prior=None, observed_time_series=None, name=None): """Specify a local level model. Args: level_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `level_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_level_prior: optional `tfd.Distribution` instance specifying a prior on the initial level. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Default value: `None`. name: the name of this model component. Default value: 'LocalLevel'. """ with tf.name_scope(name or 'LocalLevel') as name: dtype = dtype_util.common_dtype( [level_scale_prior, initial_level_prior]) if level_scale_prior is None or initial_level_prior is None: if observed_time_series is not None: _, observed_stddev, observed_initial = ( sts_util.empirical_statistics(observed_time_series)) else: observed_stddev, observed_initial = (tf.convert_to_tensor( value=1., dtype=dtype), tf.convert_to_tensor( value=0., dtype=dtype)) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if level_scale_prior is None: level_scale_prior = tfd.LogNormal(loc=tf.math.log( .05 * observed_stddev), scale=3., name='level_scale_prior') if initial_level_prior is None: self._initial_state_prior = tfd.MultivariateNormalDiag( loc=observed_initial[..., tf.newaxis], scale_diag=(tf.abs(observed_initial) + observed_stddev)[..., tf.newaxis], name='initial_level_prior') else: self._initial_state_prior = tfd.MultivariateNormalDiag( loc=initial_level_prior.mean()[..., tf.newaxis], scale_diag=initial_level_prior.stddev()[..., tf.newaxis]) super(LocalLevel, self).__init__(parameters=[ Parameter( 'level_scale', level_scale_prior, tfb.Chain([ tfb.AffineScalar(scale=observed_stddev), tfb.Softplus() ])), ], latent_size=1, name=name)
def __init__(self, rate=None, log_rate=None, interpolate_nondiscrete=True, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Poisson'): """Initialize a batch of Poisson distributions. Args: rate: Floating point tensor, the rate parameter. `rate` must be positive. Must specify exactly one of `rate` and `log_rate`. log_rate: Floating point tensor, the log of the rate parameter. Must specify exactly one of `rate` and `log_rate`. interpolate_nondiscrete: Python `bool`. When `False`, `log_prob` returns `-inf` (and `prob` returns `0`) for non-integer inputs. When `True`, `log_prob` evaluates the continuous function `k * log_rate - lgamma(k+1) - rate`, which matches the Poisson pmf at integer arguments `k` (note that this function is not itself a normalized probability log-density). Default value: `True`. dtype: The type of the event samples. Default: `int32`. validate_args: Python `bool`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. Default value: `False`. allow_nan_stats: Python `bool`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. Default value: `True`. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if none or both of `rate`, `log_rate` are specified. TypeError: if `rate` is not a float-type. TypeError: if `log_rate` is not a float-type. """ parameters = dict(locals()) if (rate is None) == (log_rate is None): raise ValueError( 'Must specify exactly one of `rate` and `log_rate`.') with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([rate, log_rate], dtype_hint=tf.float32) if not dtype_util.is_floating(dtype): raise TypeError( '[log_]rate.dtype ({}) is a not a float-type.'.format( dtype_util.name(dtype))) self._rate = tensor_util.convert_nonref_to_tensor(rate, name='rate', dtype=dtype) self._log_rate = tensor_util.convert_nonref_to_tensor( log_rate, name='log_rate', dtype=dtype) self._interpolate_nondiscrete = interpolate_nondiscrete super(Poisson, self).__init__( dtype=dtype, reparameterization_type=reparameterization.NOT_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, name=name)
def __init__(self, kernel, index_points=None, mean_fn=None, observation_noise_variance=0., jitter=1e-6, validate_args=False, allow_nan_stats=False, name='GaussianProcess'): """Instantiate a GaussianProcess Distribution. Args: kernel: `PositiveSemidefiniteKernel`-like instance representing the GP's covariance function. index_points: `float` `Tensor` representing finite (batch of) vector(s) of points in the index set over which the GP is defined. Shape has the form `[b1, ..., bB, e, f1, ..., fF]` where `F` is the number of feature dimensions and must equal `kernel.feature_ndims` and `e` is the number (size) of index points in each batch. Ultimately this distribution corresponds to a `e`-dimensional multivariate normal. The batch shape must be broadcastable with `kernel.batch_shape` and any batch dims yielded by `mean_fn`. mean_fn: Python `callable` that acts on `index_points` to produce a (batch of) vector(s) of mean values at `index_points`. Takes a `Tensor` of shape `[b1, ..., bB, f1, ..., fF]` and returns a `Tensor` whose shape is broadcastable with `[b1, ..., bB]`. Default value: `None` implies constant zero function. observation_noise_variance: `float` `Tensor` representing the variance of the noise in the Normal likelihood distribution of the model. May be batched, in which case the batch shape must be broadcastable with the shapes of all other batched parameters (`kernel.batch_shape`, `index_points`, etc.). Default value: `0.` jitter: `float` scalar `Tensor` added to the diagonal of the covariance matrix to ensure positive definiteness of the covariance matrix. Default value: `1e-6`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. Default value: `False`. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. Default value: `False`. name: Python `str` name prefixed to Ops created by this class. Default value: "GaussianProcess". Raises: ValueError: if `mean_fn` is not `None` and is not callable. """ parameters = dict(locals()) with tf.name_scope(name) as name: dtype = dtype_util.common_dtype( [index_points, observation_noise_variance, jitter], tf.float32) if index_points is not None: index_points = tf.convert_to_tensor(value=index_points, dtype=dtype, name='index_points') jitter = tf.convert_to_tensor(value=jitter, dtype=dtype, name='jitter') observation_noise_variance = tf.convert_to_tensor( value=observation_noise_variance, dtype=dtype, name='observation_noise_variance') self._kernel = kernel self._index_points = index_points # Default to a constant zero function, borrowing the dtype from # index_points to ensure consistency. if mean_fn is None: mean_fn = lambda x: tf.zeros([1], dtype=dtype) else: if not callable(mean_fn): raise ValueError('`mean_fn` must be a Python callable') self._mean_fn = mean_fn self._observation_noise_variance = observation_noise_variance self._jitter = jitter graph_parents = [observation_noise_variance, jitter] if index_points is not None: graph_parents.append(index_points) with tf.name_scope('init'): super(GaussianProcess, self).__init__( dtype=dtype, reparameterization_type=reparameterization. FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=graph_parents, name=name)
def one_step_predictive(model, posterior_samples, num_forecast_steps=0, original_mean=0., original_scale=1., thin_every=10): """Constructs a one-step-ahead predictive distribution at every timestep. Unlike the generic `tfp.sts.one_step_predictive`, this method uses the latent levels from Gibbs sampling to efficiently construct a predictive distribution that mixes over posterior samples. The predictive distribution may also include additional forecast steps. This method returns the predictive distributions for each timestep given previous timesteps and sampled model parameters, `p(observed_time_series[t] | observed_time_series[:t], weights, observation_noise_scale)`. Note that the posterior values of the weights and noise scale will in general be informed by observations from all timesteps *including the step being predicted*, so this is not a strictly kosher probabilistic quantity, but in general we assume that it's close, i.e., that the step being predicted had very small individual impact on the overall parameter posterior. Args: model: A `tfd.sts.StructuralTimeSeries` model instance. This must be of the form constructed by `build_model_for_gibbs_sampling`. posterior_samples: A `GibbsSamplerState` instance in which each element is a `Tensor` with initial dimension of size `num_samples`. num_forecast_steps: Python `int` number of additional forecast steps to append. Default value: `0`. original_mean: Optional scalar float `Tensor`, added to the predictive distribution to undo the effect of input normalization. Default value: `0.` original_scale: Optional scalar float `Tensor`, used to rescale the predictive distribution to undo the effect of input normalization. Default value: `1.` thin_every: Optional Python `int` factor by which to thin the posterior samples, to reduce complexity of the predictive distribution. For example, if `thin_every=10`, every `10`th sample will be used. Default value: `10`. Returns: predictive_dist: A `tfd.MixtureSameFamily` instance of event shape `[num_timesteps + num_forecast_steps]` representing the predictive distribution of each timestep given previous timesteps. """ dtype = dtype_util.common_dtype([ posterior_samples.level_scale.dtype, posterior_samples.observation_noise_scale.dtype, posterior_samples.level.dtype, original_mean, original_scale], dtype_hint=tf.float32) num_observed_steps = prefer_static.shape(posterior_samples.level)[-1] original_mean = tf.convert_to_tensor(original_mean, dtype=dtype) original_scale = tf.convert_to_tensor(original_scale, dtype=dtype) thinned_samples = tf.nest.map_structure(lambda x: x[::thin_every], posterior_samples) if prefer_static.rank_from_shape( # If no slope was inferred, treat as zero. prefer_static.shape(thinned_samples.slope)) <= 1: thinned_samples = thinned_samples._replace( slope=tf.zeros_like(thinned_samples.level), slope_scale=tf.zeros_like(thinned_samples.level_scale)) num_steps_from_last_observation = tf.concat([ tf.ones([num_observed_steps], dtype=dtype), tf.range(1, num_forecast_steps + 1, dtype=dtype)], axis=0) # The local linear trend model expects that the level at step t + 1 is equal # to the level at step t, plus the slope at time t - 1, # plus transition noise of scale 'level_scale' (which we account for below). if num_forecast_steps > 0: num_batch_dims = prefer_static.rank_from_shape( prefer_static.shape(thinned_samples.level)) - 2 # All else equal, the current level will remain stationary. forecast_level = tf.tile(thinned_samples.level[..., -1:], tf.concat([tf.ones([num_batch_dims + 1], dtype=tf.int32), [num_forecast_steps]], axis=0)) # If the model includes slope, the level will steadily increase. forecast_level += (thinned_samples.slope[..., -1:] * tf.range(1., num_forecast_steps + 1., dtype=forecast_level.dtype)) level_pred = tf.concat([thinned_samples.level[..., :1], # t == 0 (thinned_samples.level[..., :-1] + thinned_samples.slope[..., :-1]) # 1 <= t < T ] + ( [forecast_level] if num_forecast_steps > 0 else []), axis=-1) design_matrix = _get_design_matrix( model).to_dense()[:num_observed_steps + num_forecast_steps] regression_effect = tf.linalg.matvec(design_matrix, thinned_samples.weights) y_mean = ((level_pred + regression_effect) * original_scale[..., tf.newaxis] + original_mean[..., tf.newaxis]) # To derive a forecast variance, including slope uncertainty, let # `r[:k]` be iid Gaussian RVs with variance `level_scale**2` and `s[:k]` be # iid Gaussian RVs with variance `slope_scale**2`. Then the forecast level at # step `T + k` can be written as # (level[T] + # Last known level. # r[0] + ... + r[k] + # Sum of random walk terms on level. # slope[T] * k # Contribution from last known slope. # (k - 1) * s[0] + # Contributions from random walk terms on slope. # (k - 2) * s[1] + # ... + # 1 * s[k - 1]) # which has variance of # (level_scale**2 * k + # slope_scale**2 * ( (k - 1)**2 + # (k - 2)**2 + # ... + 1 )) # Here the `slope_scale` coefficient is the `k - 1`th square pyramidal # number [1], which is given by # (k - 1) * k * (2 * k - 1) / 6. # # [1] https://en.wikipedia.org/wiki/Square_pyramidal_number variance_from_level = (thinned_samples.level_scale[..., tf.newaxis]**2 * num_steps_from_last_observation) variance_from_slope = thinned_samples.slope_scale[..., tf.newaxis]**2 * ( (num_steps_from_last_observation - 1) * num_steps_from_last_observation * (2 * num_steps_from_last_observation - 1)) / 6. y_scale = (original_scale * tf.sqrt( thinned_samples.observation_noise_scale[..., tf.newaxis]**2 + variance_from_level + variance_from_slope)) num_posterior_draws = prefer_static.shape(y_mean)[0] return tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical( logits=tf.zeros([num_posterior_draws], dtype=y_mean.dtype)), components_distribution=tfd.Normal( loc=dist_util.move_dimension(y_mean, 0, -1), scale=dist_util.move_dimension(y_scale, 0, -1)))
def __init__(self, mean_direction, concentration, validate_args=False, allow_nan_stats=True, name='VonMisesFisher'): """Creates a new `VonMisesFisher` instance. Args: mean_direction: Floating-point `Tensor` with shape [B1, ... Bn, D]. A unit vector indicating the mode of the distribution, or the unit-normalized direction of the mean. (This is *not* in general the mean of the distribution; the mean is not generally in the support of the distribution.) NOTE: `D` is currently restricted to <= 5. concentration: Floating-point `Tensor` having batch shape [B1, ... Bn] broadcastable with `mean_direction`. The level of concentration of samples around the `mean_direction`. `concentration=0` indicates a uniform distribution over the unit hypersphere, and `concentration=+inf` indicates a `Deterministic` distribution (delta function) at `mean_direction`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: For known-bad arguments, i.e. unsupported event dimension. """ parameters = dict(locals()) with tf.name_scope(name, values=[mean_direction, concentration]) as name: dtype = dtype_util.common_dtype([mean_direction, concentration], tf.float32) mean_direction = tf.convert_to_tensor( mean_direction, name='mean_direction', dtype=dtype) concentration = tf.convert_to_tensor( concentration, name='concentration', dtype=dtype) assertions = [ tf.assert_non_negative( concentration, message='`concentration` must be non-negative'), tf.assert_greater( tf.shape(mean_direction)[-1], 1, message='`mean_direction` may not have scalar event shape'), tf.assert_near( 1., tf.linalg.norm(mean_direction, axis=-1), message='`mean_direction` must be unit-length') ] if validate_args else [] if mean_direction.shape.with_rank_at_least(1)[-1].value is not None: if mean_direction.shape.with_rank_at_least(1)[-1].value > 5: raise ValueError('vMF ndims > 5 is not currently supported') elif validate_args: assertions += [tf.assert_less_equal( tf.shape(mean_direction)[-1], 5, message='vMF ndims > 5 is not currently supported')] with tf.control_dependencies(assertions): self._mean_direction = tf.identity(mean_direction) self._concentration = tf.identity(concentration) tf.assert_same_float_dtype([self._mean_direction, self._concentration]) # mean_direction is always reparameterized. # concentration is only for event_dim==3, via an inversion sampler. reparameterization_type = ( reparameterization.FULLY_REPARAMETERIZED if mean_direction.shape.with_rank_at_least(1)[-1].value == 3 else reparameterization.NOT_REPARAMETERIZED) super(VonMisesFisher, self).__init__( dtype=self._concentration.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization_type, parameters=parameters, graph_parents=[self._mean_direction, self._concentration], name=name)
def __init__(self, num_timesteps, design_matrix, drift_scale, initial_state_prior, observation_noise_scale=0., name=None, **linear_gaussian_ssm_kwargs): """State space model for a dynamic linear regression. Args: num_timesteps: Scalar `int` `Tensor` number of timesteps to model with this distribution. design_matrix: float `Tensor` of shape `concat([batch_shape, [num_timesteps, num_features]])`. drift_scale: Scalar (any additional dimensions are treated as batch dimensions) `float` `Tensor` indicating the standard deviation of the latent state transitions. initial_state_prior: instance of `tfd.MultivariateNormal` representing the prior distribution on latent states. Must have event shape `[num_features]`. observation_noise_scale: Scalar (any additional dimensions are treated as batch dimensions) `float` `Tensor` indicating the standard deviation of the observation noise. Default value: `0.`. name: Python `str` name prefixed to ops created by this class. Default value: 'DynamicLinearRegressionStateSpaceModel'. **linear_gaussian_ssm_kwargs: Optional additional keyword arguments to to the base `tfd.LinearGaussianStateSpaceModel` constructor. """ parameters = dict(locals()) parameters.update(linear_gaussian_ssm_kwargs) del parameters['linear_gaussian_ssm_kwargs'] with tf.name_scope( name or 'DynamicLinearRegressionStateSpaceModel') as name: dtype = dtype_util.common_dtype( [design_matrix, drift_scale, initial_state_prior]) design_matrix = tf.convert_to_tensor(value=design_matrix, name='design_matrix', dtype=dtype) design_matrix_with_time_in_first_dim = distribution_util.move_dimension( design_matrix, -2, 0) drift_scale = tf.convert_to_tensor(value=drift_scale, name='drift_scale', dtype=dtype) observation_noise_scale = tf.convert_to_tensor( value=observation_noise_scale, name='observation_noise_scale', dtype=dtype) num_features = prefer_static.shape(design_matrix)[-1] def observation_matrix_fn(t): observation_matrix = tf.linalg.LinearOperatorFullMatrix( tf.gather(design_matrix_with_time_in_first_dim, t)[..., tf.newaxis, :], name='observation_matrix') return observation_matrix self._drift_scale = drift_scale self._observation_noise_scale = observation_noise_scale super(DynamicLinearRegressionStateSpaceModel, self).__init__( num_timesteps=num_timesteps, transition_matrix=tf.linalg.LinearOperatorIdentity( num_rows=num_features, dtype=dtype, name='transition_matrix'), transition_noise=tfd.MultivariateNormalDiag( scale_diag=(drift_scale[..., tf.newaxis] * tf.ones([num_features], dtype=dtype)), name='transition_noise'), observation_matrix=observation_matrix_fn, observation_noise=tfd.MultivariateNormalDiag( scale_diag=observation_noise_scale[..., tf.newaxis], name='observation_noise'), initial_state_prior=initial_state_prior, name=name, **linear_gaussian_ssm_kwargs) self._parameters = parameters
def testCommonDtypeFromLinop(self): x = tf.linalg.LinearOperatorDiag(tf.ones(3, tf.float16)) self.assertEqual( tf.float16, dtype_util.common_dtype([x], preferred_dtype=tf.float32))
def __init__(self, design_matrix, drift_scale_prior=None, initial_weights_prior=None, observed_time_series=None, name=None): """Specify a dynamic linear regression. Args: design_matrix: float `Tensor` of shape `concat([batch_shape, [num_timesteps, num_features]])`. drift_scale_prior: instance of `tfd.Distribution` specifying a prior on the `drift_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_weights_prior: instance of `tfd.MultivariateNormal` representing the prior distribution on the latent states (the regression weights). Must have event shape `[num_features]`. If `None`, a weakly-informative Normal(0., 10.) prior is used. Default value: `None`. observed_time_series: `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Default value: `None`. name: Python `str` for the name of this component. Default value: 'DynamicLinearRegression'. """ with tf.name_scope(name or 'DynamicLinearRegression') as name: dtype = dtype_util.common_dtype( [design_matrix, drift_scale_prior, initial_weights_prior]) num_features = prefer_static.shape(design_matrix)[-1] # Default to a weakly-informative Normal(0., 10.) for the initital state if initial_weights_prior is None: initial_weights_prior = tfd.MultivariateNormalDiag( scale_diag=10. * tf.ones([num_features], dtype=dtype)) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if drift_scale_prior is None: if observed_time_series is None: observed_stddev = tf.constant(1.0, dtype=dtype) else: _, observed_stddev, _ = sts_util.empirical_statistics( observed_time_series) drift_scale_prior = tfd.LogNormal(loc=tf.math.log( .05 * observed_stddev), scale=3., name='drift_scale_prior') self._initial_state_prior = initial_weights_prior self._design_matrix = design_matrix super(DynamicLinearRegression, self).__init__(parameters=[ Parameter( 'drift_scale', drift_scale_prior, tfb.Chain([ tfb.Scale(scale=observed_stddev), tfb.Softplus() ])) ], latent_size=num_features, name=name)
def __init__(self, loc, scale, concentration, rate, validate_args=False, allow_nan_stats=True, name="NormalGamma"): """Initializes a batch of Normal-Gamma distributions. Args: loc: Floating point tensor; the means of the distribution(s). scale: Floating point tensor; the stddevs of the distribution(s). Must contain only positive values. (`scale = sqrt(lambda)` according to the wikipedia article) concentration: Floating point tensor, the concentration params of the distribution(s). Must contain only positive values. rate: Floating point tensor, the inverse scale params of the distribution(s). Must contain only positive values. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `concentration` and `rate` are different dtypes. """ parameters = dict(locals()) with tf.name_scope( name, values=[loc, scale, concentration, rate]): dtype = dtype_util.common_dtype( [loc, scale, concentration, rate], preferred_dtype=tf.float32) loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype) scale = tf.convert_to_tensor( scale, name="scale", dtype=dtype) concentration = tf.convert_to_tensor( concentration, name="concentration", dtype=dtype) rate = tf.convert_to_tensor(rate, name="rate", dtype=dtype) with tf.control_dependencies([ tf.assert_positive(scale), tf.assert_positive(concentration), tf.assert_positive(rate), ] if validate_args else []): self._loc = tf.identity(loc) self._scale = tf.identity(scale) self._concentration = tf.identity(concentration) self._rate = tf.identity(rate) tf.assert_same_float_dtype( [self._loc, self._scale, self._concentration, self._rate]) # the coefficient for the precision self._lambda = tf.square(self._scale) super(NormalGamma, self).__init__( dtype=self._loc.dtype, validate_args=validate_args, allow_nan_stats=allow_nan_stats, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, parameters=parameters, graph_parents=[self._loc, self._scale, self._concentration, self._rate], name=name)
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, skewness=None, tailweight=None, distribution=None, validate_args=False, allow_nan_stats=True, name='VectorSinhArcsinhDiag'): """Construct VectorSinhArcsinhDiag distribution on `R^k`. The arguments `scale_diag` and `scale_identity_multiplier` combine to define the diagonal `scale` referred to in this class docstring: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scale-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. skewness: Skewness parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. tailweight: Tailweight parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is `tfd.Normal(loc=0., scale=1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorSinhArcsinhDiag sample and `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value '`NaN`' to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([ loc, scale_diag, scale_identity_multiplier, skewness, tailweight ], tf.float32) loc = loc if loc is None else tf.convert_to_tensor( loc, name='loc', dtype=dtype) tailweight = 1. if tailweight is None else tailweight skewness = 0. if skewness is None else skewness # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) # C := 2 * scale / F_0(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: # 1. get shapes from looking at loc and the two scale args. # 2. combine scale_diag with scale_identity_multiplier, which gives us # 'scale', which in turn gives us 'C'. scale_linop = _make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False, dtype=dtype) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale_linop) # scale_linop.diag_part() is efficient since it is a diag type linop. scale_diag_part = scale_linop.diag_part() dtype = scale_diag_part.dtype if distribution is None: distribution = normal.Normal(loc=tf.zeros([], dtype=dtype), scale=tf.ones([], dtype=dtype), allow_nan_stats=allow_nan_stats) else: asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: scale_diag_part = distribution_util.with_dependencies( asserts, scale_diag_part) # Make the SAS bijector, 'F'. skewness = tf.convert_to_tensor(skewness, dtype=dtype, name='skewness') tailweight = tf.convert_to_tensor(tailweight, dtype=dtype, name='tailweight') f = sinh_arcsinh_bijector.SinhArcsinh(skewness=skewness, tailweight=tailweight) affine = affine_bijector.Affine(shift=loc, scale_diag=scale_diag_part, validate_args=validate_args) bijector = chain_bijector.Chain([affine, f]) super(VectorSinhArcsinhDiag, self).__init__(distribution=distribution, bijector=bijector, batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters self._loc = loc self._scale = scale_linop self._tailweight = tailweight self._skewness = skewness
def __init__(self, shift=None, scale=None, log_scale=None, validate_args=False, name='affine_scalar'): """Instantiates the `AffineScalar` bijector. This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments, giving the forward operation: ```none Y = g(X) = scale * X + shift ``` Alternatively, you can specify `log_scale` instead of `scale` for slighly better numerics with tiny scales. Note that when using `log_scale` it is currently impossible to specify a negative scale. If `scale` or `log_scale` are not specified, then the bijector has the semantics of `scale = 1.`. Similarly, if `shift` is not specified, then the bijector has the semantics of `shift = 0.`. Args: shift: Floating-point `Tensor`. If this is set to `None`, no shift is applied. scale: Floating-point `Tensor`. If this is set to `None`, no scale is applied. This should not be set if `log_scale` is set. log_scale: Floating-point `Tensor`. Logarithm of the scale. If this is set to `None`, no scale is applied. This should not be set if `scale` is set. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. Raises: ValueError: If both `scale` and `log_scale` are specified. """ with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([shift, scale, log_scale], dtype_hint=tf.float32) if scale is not None and log_scale is not None: raise ValueError( 'At most one of `scale` and `log_scale` should be ' 'specified') self._shift = tensor_util.convert_nonref_to_tensor(shift, dtype=dtype, name='shift') self._scale = tensor_util.convert_nonref_to_tensor(scale, dtype=dtype, name='scale') self._log_scale = tensor_util.convert_nonref_to_tensor( log_scale, dtype=dtype, name='log_scale') super(AffineScalar, self).__init__(forward_min_event_ndims=0, is_constant_jacobian=True, validate_args=validate_args, dtype=dtype, name=name)
def _make_diag_scale(loc=None, scale_diag=None, scale_identity_multiplier=None, shape_hint=None, validate_args=False, assert_positive=False, name=None, dtype=None): """Creates a LinearOperator representing a diagonal matrix. Args: loc: Floating-point `Tensor`. This is used for inferring shape in the case where only `scale_identity_multiplier` is set. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to the LinearOperator. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag = scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. shape_hint: scalar integer `Tensor` representing a hint at the dimension of the identity matrix when only `scale_identity_multiplier` is set. validate_args: Python `bool` indicating whether arguments should be checked for correctness. assert_positive: Python `bool` indicating whether LinearOperator should be checked for being positive definite. name: Python `str` name given to ops managed by this object. dtype: TF `DType` to prefer when converting args to `Tensor`s. Else, we fall back to a compatible dtype across all of `loc`, `scale_diag`, and `scale_identity_multiplier`. Returns: `LinearOperator` representing a lower triangular matrix. Raises: ValueError: If only `scale_identity_multiplier` is set and `loc` and `shape_hint` are both None. """ with tf.name_scope(name or 'make_diag_scale'): if dtype is None: dtype = dtype_util.common_dtype( [loc, scale_diag, scale_identity_multiplier], dtype_hint=tf.float32) loc = tensor_util.convert_nonref_to_tensor(loc, name='loc', dtype=dtype) scale_diag = tensor_util.convert_nonref_to_tensor(scale_diag, name='scale_diag', dtype=dtype) scale_identity_multiplier = tensor_util.convert_nonref_to_tensor( scale_identity_multiplier, name='scale_identity_multiplier', dtype=dtype) if scale_diag is not None: if scale_identity_multiplier is not None: scale_diag = scale_diag + scale_identity_multiplier[..., tf.newaxis] return tf.linalg.LinearOperatorDiag( diag=scale_diag, is_non_singular=True, is_self_adjoint=True, is_positive_definite=assert_positive) if loc is None and shape_hint is None: raise ValueError('Cannot infer `event_shape` unless `loc` or ' '`shape_hint` is specified.') num_rows = shape_hint del shape_hint if num_rows is None: num_rows = tf.compat.dimension_value(loc.shape[-1]) if num_rows is None: num_rows = tf.shape(loc)[-1] if scale_identity_multiplier is None: return tf.linalg.LinearOperatorIdentity( num_rows=num_rows, dtype=dtype, is_self_adjoint=True, is_positive_definite=True, assert_proper_shapes=validate_args) return tf.linalg.LinearOperatorScaledIdentity( num_rows=num_rows, multiplier=scale_identity_multiplier, is_non_singular=True, is_self_adjoint=True, is_positive_definite=assert_positive, assert_proper_shapes=validate_args)
def __init__(self, df, kernel, index_points=None, mean_fn=None, observation_noise_variance=0., marginal_fn=None, cholesky_fn=None, jitter=1e-6, validate_args=False, allow_nan_stats=False, name='StudentTProcess'): """Instantiate a StudentTProcess Distribution. Args: df: Positive Floating-point `Tensor` representing the degrees of freedom. Must be greater than 2. kernel: `PositiveSemidefiniteKernel`-like instance representing the TP's covariance function. index_points: `float` `Tensor` representing finite (batch of) vector(s) of points in the index set over which the TP is defined. Shape has the form `[b1, ..., bB, e, f1, ..., fF]` where `F` is the number of feature dimensions and must equal `kernel.feature_ndims` and `e` is the number (size) of index points in each batch. Ultimately this distribution corresponds to a `e`-dimensional multivariate Student's T. The batch shape must be broadcastable with `kernel.batch_shape` and any batch dims yielded by `mean_fn`. mean_fn: Python `callable` that acts on `index_points` to produce a (batch of) vector(s) of mean values at `index_points`. Takes a `Tensor` of shape `[b1, ..., bB, f1, ..., fF]` and returns a `Tensor` whose shape is broadcastable with `[b1, ..., bB]`. Default value: `None` implies constant zero function. observation_noise_variance: `float` `Tensor` representing (batch of) scalar variance(s) of the noise in the Normal likelihood distribution of the model. If batched, the batch shape must be broadcastable with the shapes of all other batched parameters (`kernel.batch_shape`, `index_points`, etc.). Default value: `0.` marginal_fn: A Python callable that takes a location, covariance matrix, optional `validate_args`, `allow_nan_stats` and `name` arguments, and returns a multivariate normal subclass of `tfd.Distribution`. Default value: `None`, in which case a Cholesky-factorizing function is is created using `make_cholesky_factored_marginal_fn` and the `jitter` argument. cholesky_fn: Callable which takes a single (batch) matrix argument and returns a Cholesky-like lower triangular factor. Default value: `None`, in which case `make_cholesky_with_jitter_fn` is used with the `jitter` parameter. At most one of `cholesky_fn` and `marginal_fn` should be set. jitter: `float` scalar `Tensor` added to the diagonal of the covariance matrix to ensure positive definiteness of the covariance matrix. This argument is ignored if `cholesky_fn` is set. Default value: `1e-6`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. Default value: `False`. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. Default value: `False`. name: Python `str` name prefixed to Ops created by this class. Default value: "StudentTProcess". Raises: ValueError: if `mean_fn` is not `None` and is not callable. """ parameters = dict(locals()) with tf.name_scope(name) as name: dtype = dtype_util.common_dtype( [df, index_points, observation_noise_variance, jitter], tf.float32) df = tensor_util.convert_nonref_to_tensor(df, dtype=dtype, name='df') observation_noise_variance = tensor_util.convert_nonref_to_tensor( observation_noise_variance, dtype=dtype, name='observation_noise_variance') index_points = tensor_util.convert_nonref_to_tensor( index_points, dtype=dtype, name='index_points') jitter = tensor_util.convert_nonref_to_tensor( jitter, dtype=dtype, name='jitter') self._kernel = kernel self._index_points = index_points # Default to a constant zero function, borrowing the dtype from # index_points to ensure consistency. if mean_fn is None: mean_fn = lambda x: tf.zeros([1], dtype=dtype) else: if not callable(mean_fn): raise ValueError('`mean_fn` must be a Python callable') self._df = df self._observation_noise_variance = observation_noise_variance self._mean_fn = mean_fn self._jitter = jitter self._cholesky_fn = cholesky_fn if marginal_fn is not None and cholesky_fn is not None: raise ValueError( 'At most one of `marginal_fn` and `cholesky_fn` should be set.') if marginal_fn is None: if self._cholesky_fn is None: self._cholesky_fn = cholesky_util.make_cholesky_with_jitter_fn( jitter) self._marginal_fn = make_cholesky_factored_marginal_fn( self._cholesky_fn) else: self._marginal_fn = marginal_fn with tf.name_scope('init'): super(StudentTProcess, self).__init__( dtype=dtype, reparameterization_type=reparameterization.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, name=name)
def pivoted_cholesky(matrix, max_rank, diag_rtol=1e-3, name=None): """Computes the (partial) pivoted cholesky decomposition of `matrix`. The pivoted Cholesky is a low rank approximation of the Cholesky decomposition of `matrix`, i.e. as described in [(Harbrecht et al., 2012)][1]. The currently-worst-approximated diagonal element is selected as the pivot at each iteration. This yields from a `[B1...Bn, N, N]` shaped `matrix` a `[B1...Bn, N, K]` shaped rank-`K` approximation `lr` such that `lr @ lr.T ~= matrix`. Note that, unlike the Cholesky decomposition, `lr` is not triangular even in a rectangular-matrix sense. However, under a permutation it could be made triangular (it has one more zero in each column as you move to the right). Such a matrix can be useful as a preconditioner for conjugate gradient optimization, i.e. as in [(Wang et al. 2019)][2], as matmuls and solves can be cheaply done via the Woodbury matrix identity, as implemented by `tf.linalg.LinearOperatorLowRankUpdate`. Args: matrix: Floating point `Tensor` batch of symmetric, positive definite matrices. max_rank: Scalar `int` `Tensor`, the rank at which to truncate the approximation. diag_rtol: Scalar floating point `Tensor` (same dtype as `matrix`). If the errors of all diagonal elements of `lr @ lr.T` are each lower than `element * diag_rtol`, iteration is permitted to terminate early. name: Optional name for the op. Returns: lr: Low rank pivoted Cholesky approximation of `matrix`. #### References [1]: H Harbrecht, M Peters, R Schneider. On the low-rank approximation by the pivoted Cholesky decomposition. _Applied numerical mathematics_, 62(4):428-440, 2012. [2]: K. A. Wang et al. Exact Gaussian Processes on a Million Data Points. _arXiv preprint arXiv:1903.08114_, 2019. https://arxiv.org/abs/1903.08114 """ with tf.name_scope(name or 'pivoted_cholesky'): dtype = dtype_util.common_dtype([matrix, diag_rtol], dtype_hint=tf.float32) matrix = tf.convert_to_tensor(matrix, name='matrix', dtype=dtype) if tensorshape_util.rank(matrix.shape) is None: raise NotImplementedError( 'Rank of `matrix` must be known statically') max_rank = tf.convert_to_tensor(max_rank, name='max_rank', dtype=tf.int64) max_rank = tf.minimum( max_rank, prefer_static.shape(matrix, out_type=tf.int64)[-1]) diag_rtol = tf.convert_to_tensor(diag_rtol, dtype=dtype, name='diag_rtol') matrix_diag = tf.linalg.diag_part(matrix) # matrix is P.D., therefore all matrix_diag > 0, so we don't need abs. orig_error = tf.reduce_max(matrix_diag, axis=-1) def cond(m, pchol, perm, matrix_diag): """Condition for `tf.while_loop` continuation.""" del pchol del perm error = tf.linalg.norm(matrix_diag, ord=1, axis=-1) max_err = tf.reduce_max(error / orig_error) return (m < max_rank) & (tf.equal(m, 0) | (max_err > diag_rtol)) batch_dims = tensorshape_util.rank(matrix.shape) - 2 def batch_gather(params, indices, axis=-1): return tf.gather(params, indices, axis=axis, batch_dims=batch_dims) def body(m, pchol, perm, matrix_diag): """Body of a single `tf.while_loop` iteration.""" # Here is roughly a numpy, non-batched version of what's going to happen. # (See also Algorithm 1 of Harbrecht et al.) # 1: maxi = np.argmax(matrix_diag[perm[m:]]) + m # 2: maxval = matrix_diag[perm][maxi] # 3: perm[m], perm[maxi] = perm[maxi], perm[m] # 4: row = matrix[perm[m]][perm[m + 1:]] # 5: row -= np.sum(pchol[:m][perm[m + 1:]] * pchol[:m][perm[m]]], axis=-2) # 6: pivot = np.sqrt(maxval); row /= pivot # 7: row = np.concatenate([[[pivot]], row], -1) # 8: matrix_diag[perm[m:]] -= row**2 # 9: pchol[m, perm[m:]] = row # Find the maximal position of the (remaining) permuted diagonal. # Steps 1, 2 above. permuted_diag = batch_gather(matrix_diag, perm[..., m:]) maxi = tf.argmax(permuted_diag, axis=-1, output_type=tf.int64)[..., tf.newaxis] maxval = batch_gather(permuted_diag, maxi) maxi = maxi + m maxval = maxval[..., 0] # Update perm: Swap perm[...,m] with perm[...,maxi]. Step 3 above. perm = _swap_m_with_i(perm, m, maxi) # Step 4. row = batch_gather(matrix, perm[..., m:m + 1], axis=-2) row = batch_gather(row, perm[..., m + 1:]) # Step 5. prev_rows = pchol[..., :m, :] prev_rows_perm_m_onward = batch_gather(prev_rows, perm[..., m + 1:]) prev_rows_pivot_col = batch_gather(prev_rows, perm[..., m:m + 1]) row -= tf.reduce_sum(prev_rows_perm_m_onward * prev_rows_pivot_col, axis=-2)[..., tf.newaxis, :] # Step 6. pivot = tf.sqrt(maxval)[..., tf.newaxis, tf.newaxis] # Step 7. row = tf.concat([pivot, row / pivot], axis=-1) # TODO(b/130899118): Pad grad fails with int64 paddings. # Step 8. paddings = tf.concat([ tf.zeros([prefer_static.rank(pchol) - 1, 2], dtype=tf.int32), [[tf.cast(m, tf.int32), 0]] ], axis=0) diag_update = tf.pad(row**2, paddings=paddings)[..., 0, :] reverse_perm = _invert_permutation(perm) matrix_diag -= batch_gather(diag_update, reverse_perm) # Step 9. row = tf.pad(row, paddings=paddings) # TODO(bjp): Defer the reverse permutation all-at-once at the end? row = batch_gather(row, reverse_perm) pchol_shape = pchol.shape pchol = tf.concat([pchol[..., :m, :], row, pchol[..., m + 1:, :]], axis=-2) tensorshape_util.set_shape(pchol, pchol_shape) return m + 1, pchol, perm, matrix_diag m = np.int64(0) pchol = tf.zeros_like(matrix[..., :max_rank, :]) matrix_shape = prefer_static.shape(matrix, out_type=tf.int64) perm = tf.broadcast_to(prefer_static.range(matrix_shape[-1]), matrix_shape[:-1]) _, pchol, _, _ = tf.while_loop(cond=cond, body=body, loop_vars=(m, pchol, perm, matrix_diag)) pchol = tf.linalg.matrix_transpose(pchol) tensorshape_util.set_shape( pchol, tensorshape_util.concatenate(matrix_diag.shape, [None])) return pchol
def find_bins(x, edges, extend_lower_interval=False, extend_upper_interval=False, dtype=None, name=None): """Bin values into discrete intervals. Given `edges = [c0, ..., cK]`, defining intervals `I0 = [c0, c1)`, `I1 = [c1, c2)`, ..., `I_{K-1} = [c_{K-1}, cK]`, This function returns `bins`, such that: `edges[bins[i]] <= x[i] < edges[bins[i] + 1]`. Args: x: Numeric `N-D` `Tensor` with `N > 0`. edges: `Tensor` of same `dtype` as `x`. The first dimension indexes edges of intervals. Must either be `1-D` or have `x.shape[1:] == edges.shape[1:]`. If `rank(edges) > 1`, `edges[k]` designates a shape `edges.shape[1:]` `Tensor` of bin edges for the corresponding dimensions of `x`. extend_lower_interval: Python `bool`. If `True`, extend the lowest interval `I0` to `(-inf, c1]`. extend_upper_interval: Python `bool`. If `True`, extend the upper interval `I_{K-1}` to `[c_{K-1}, +inf)`. dtype: The output type (`int32` or `int64`). `Default value:` `x.dtype`. This effects the output values when `x` is below/above the intervals, which will be `-1/K+1` for `int` types and `NaN` for `float`s. At indices where `x` is `NaN`, the output values will be `0` for `int` types and `NaN` for floats. name: A Python string name to prepend to created ops. Default: 'find_bins' Returns: bins: `Tensor` with same `shape` as `x` and `dtype`. Has whole number values. `bins[i] = k` means the `x[i]` falls into the `kth` bin, ie, `edges[bins[i]] <= x[i] < edges[bins[i] + 1]`. Raises: ValueError: If `edges.shape[0]` is determined to be less than 2. #### Examples Cut a `1-D` array ```python x = [0., 5., 6., 10., 20.] edges = [0., 5., 10.] tfp.stats.find_bins(x, edges) ==> [0., 0., 1., 1., np.nan] ``` Cut `x` into its deciles ```python x = tf.random_uniform(shape=(100, 200)) decile_edges = tfp.stats.quantiles(x, num_quantiles=10) bins = tfp.stats.find_bins(x, edges=decile_edges) bins.shape ==> (100, 200) tf.reduce_mean(bins == 0.) ==> approximately 0.1 tf.reduce_mean(bins == 1.) ==> approximately 0.1 ``` """ # TFP users may be surprised to see the "action" in the leftmost dim of # edges, rather than the rightmost (event) dim. Why? # 1. Most likely you created edges by getting quantiles over samples, and # quantile/percentile return these edges in the leftmost (sample) dim. # 2. Say you have event_shape = [5], then we expect the bin will be different # for all 5 events, so the index of the bin should not be in the event dim. with tf.compat.v1.name_scope(name, default_name='find_bins', values=[x, edges]): in_type = dtype_util.common_dtype([x, edges], dtype_hint=tf.float32) edges = tf.convert_to_tensor(value=edges, name='edges', dtype=in_type) x = tf.convert_to_tensor(value=x, name='x', dtype=in_type) if (tf.compat.dimension_value(edges.shape[0]) is not None and tf.compat.dimension_value(edges.shape[0]) < 2): raise ValueError( 'First dimension of `edges` must have length > 1 to index 1 or ' 'more bin. Found: {}'.format(edges.shape)) flattening_x = edges.shape.ndims == 1 and x.shape.ndims > 1 if flattening_x: x_orig_shape = tf.shape(input=x) x = tf.reshape(x, [-1]) if dtype is None: dtype = in_type dtype = tf.as_dtype(dtype) # Move first dims into the rightmost. x_permed = distribution_util.rotate_transpose(x, shift=-1) edges_permed = distribution_util.rotate_transpose(edges, shift=-1) # If... # x_permed = [0, 1, 6., 10] # edges = [0, 5, 10.] # ==> almost_output = [0, 1, 2, 2] searchsorted_type = dtype if dtype in [tf.int32, tf.int64] else None almost_output_permed = tf.searchsorted(sorted_sequence=edges_permed, values=x_permed, side='right', out_type=searchsorted_type) # Move the rightmost dims back to the leftmost. almost_output = tf.cast( distribution_util.rotate_transpose(almost_output_permed, shift=1), dtype) # In above example, we want [0, 0, 1, 1], so correct this here. bins = tf.clip_by_value(almost_output - 1, tf.cast(0, dtype), tf.cast(tf.shape(input=edges)[0] - 2, dtype)) if not extend_lower_interval: low_fill = np.nan if dtype.is_floating else -1 bins = tf.compat.v1.where( x < tf.expand_dims(edges[0], 0), tf.fill(tf.shape(input=x), tf.cast(low_fill, dtype)), bins) if not extend_upper_interval: up_fill = np.nan if dtype.is_floating else tf.shape( input=edges)[0] - 1 bins = tf.compat.v1.where( x > tf.expand_dims(edges[-1], 0), tf.fill(tf.shape(input=x), tf.cast(up_fill, dtype)), bins) if flattening_x: bins = tf.reshape(bins, x_orig_shape) return bins
def __init__(self, temperature, logits=None, probs=None, validate_args=False, allow_nan_stats=True, name="RelaxedBernoulli"): """Construct RelaxedBernoulli distributions. Args: temperature: An 0-D `Tensor`, representing the temperature of a set of RelaxedBernoulli distributions. The temperature should be positive. logits: An N-D `Tensor` representing the log-odds of a positive event. Each entry in the `Tensor` parametrizes an independent RelaxedBernoulli distribution where the probability of an event is sigmoid(logits). Only one of `logits` or `probs` should be passed in. probs: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent Bernoulli distribution. Only one of `logits` or `probs` should be passed in. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: If both `probs` and `logits` are passed, or if neither. """ parameters = dict(locals()) with tf.compat.v1.name_scope(name, values=[logits, probs, temperature]) as name: dtype = dtype_util.common_dtype([logits, probs, temperature], tf.float32) self._temperature = tf.convert_to_tensor(value=temperature, name="temperature", dtype=dtype) if validate_args: with tf.control_dependencies( [tf.compat.v1.assert_positive(temperature)]): self._temperature = tf.identity(self._temperature) self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, validate_args=validate_args, dtype=dtype) super(RelaxedBernoulli, self).__init__( distribution=logistic.Logistic(self._logits / self._temperature, 1. / self._temperature, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name + "/Logistic"), bijector=sigmoid_bijector.Sigmoid(validate_args=validate_args), validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, validate_args=False, allow_nan_stats=True, name='MultivariateNormalDiag'): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. A (non-batch) `scale` matrix is: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` where: * `scale_diag.shape = [k]`, and, * `scale_identity_multiplier.shape = []`. Additional leading dimensions (if any) will index batches. If both `scale_diag` and `scale_identity_multiplier` are `None`, then `scale` is the Identity matrix. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scaled-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scaled `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value '`NaN`' to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) with tf.name_scope(name) as name: dtype = dtype_util.common_dtype( [loc, scale_diag, scale_identity_multiplier], dtype_hint=tf.float32) loc = tensor_util.convert_nonref_to_tensor(loc, name='loc', dtype=dtype) scale_diag = tensor_util.convert_nonref_to_tensor( scale_diag, name='scale_diag', dtype=dtype) if scale_diag is not None and scale_identity_multiplier is not None: raise ValueError( 'Only one of `scale_diag` and `scale_identity_multiplier` is ' 'allowed. Furthermore, `scale_identity_multiplier` is deprecated; ' 'please combine it directly into `scale_diag` instead.') if scale_diag is not None: scale = tf.linalg.LinearOperatorDiag( diag=scale_diag, is_non_singular=True, is_self_adjoint=True, is_positive_definite=False) else: # Deprecated behavior; breaks variable-safety rules by calling # `tf.shape(loc)`. num_rows = tf.compat.dimension_value(loc.shape[-1]) if num_rows is None: num_rows = tf.shape(loc)[-1] if scale_identity_multiplier is not None: scale_identity_multiplier = tensor_util.convert_nonref_to_tensor( scale_identity_multiplier, name='scale_identity_multiplier', dtype=dtype) scale = tf.linalg.LinearOperatorScaledIdentity( num_rows=num_rows, multiplier=scale_identity_multiplier, is_non_singular=True, is_self_adjoint=True, is_positive_definite=False, assert_proper_shapes=False) else: scale = tf.linalg.LinearOperatorIdentity( num_rows=num_rows, dtype=dtype, is_self_adjoint=True, is_positive_definite=True, assert_proper_shapes=validate_args) super(MultivariateNormalDiag, self).__init__(loc=loc, scale=scale, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def __init__(self, low=None, high=None, hinge_softness=None, validate_args=False, name='soft_clip'): """Instantiates the SoftClip bijector. Args: low: Optional float `Tensor` lower bound. If `None`, the lower-bound constraint is omitted. Default value: `None`. high: Optional float `Tensor` upper bound. If `None`, the upper-bound constraint is omitted. Default value: `None`. hinge_softness: Optional nonzero float `Tensor`. Controls the softness of the constraint at the boundaries; values outside of the constraint set are mapped into intervals of width approximately `log(2) * hinge_softness` on the interior of each boundary. High softness reserves more space for values outside of the constraint set, leading to greater distortion of inputs *within* the constraint set, but improved numerical stability near the boundaries. Default value: `None` (`1.0`). validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. """ parameters = dict(locals()) with tf.name_scope(name): dtype = dtype_util.common_dtype([low, high, hinge_softness], dtype_hint=tf.float32) low = tensor_util.convert_nonref_to_tensor(low, name='low', dtype=dtype) high = tensor_util.convert_nonref_to_tensor(high, name='high', dtype=dtype) hinge_softness = tensor_util.convert_nonref_to_tensor( hinge_softness, name='hinge_softness', dtype=dtype) softplus_bijector = softplus.Softplus( hinge_softness=hinge_softness) negate = tf.convert_to_tensor(-1., dtype=dtype) components = [] if low is not None and high is not None: # Support reference tensors (eg Variables) for `high` and `low` by # deferring all computation on them until needed. width = tfp_util.DeferredTensor( pretransformed_input=high, transform_fn=lambda high: high - low) negated_shrinkage_factor = tfp_util.DeferredTensor( pretransformed_input=width, transform_fn=lambda w: tf.cast( # pylint: disable=g-long-lambda negate * w / softplus_bijector.forward(w), dtype=dtype)) # Implement the soft constraint from 'Mathematical Details' above: # softclip(x) := -softplus(width - softplus(x - low)) * # (width) / (softplus(width)) + high components = [ shift.Shift(high), scale.Scale(negated_shrinkage_factor), softplus_bijector, shift.Shift(width), scale.Scale(negate), softplus_bijector, shift.Shift(tfp_util.DeferredTensor(low, lambda x: -x)) ] elif low is not None: # Implement a soft lower bound: # softlower(x) := softplus(x - low) + low components = [ shift.Shift(low), softplus_bijector, shift.Shift(tfp_util.DeferredTensor(low, lambda x: -x)) ] elif high is not None: # Implement a soft upper bound: # softupper(x) := -softplus(high - x) + high components = [ shift.Shift(high), scale.Scale(negate), softplus_bijector, scale.Scale(negate), shift.Shift(high) ] self._low = low self._high = high self._hinge_softness = hinge_softness self._chain = chain.Chain(components, validate_args=validate_args) super(SoftClip, self).__init__(forward_min_event_ndims=0, dtype=dtype, validate_args=validate_args, parameters=parameters, is_constant_jacobian=not components, name=name)