def _batch_shape_tensor(self):
   with tf.control_dependencies(self._runtime_assertions):
     return tf.broadcast_dynamic_shape(
         self._initial_distribution.batch_shape_tensor(),
         tf.broadcast_dynamic_shape(
             self._transition_distribution.batch_shape_tensor()[:-1],
             self._observation_distribution.batch_shape_tensor()[:-1]))
示例#2
0
 def _batch_shape_tensor(self):
   with self._name_scope("batch_shape_tensor"):
     return tf.broadcast_dynamic_shape(
         tf.broadcast_dynamic_shape(
             tf.shape(self.amplitude),
             tf.shape(self.length_scale)),
         tf.shape(self.period))
def _itemwise_error_rate(
    total_error_rate, param_tensors, sample_tensor=None, name=None):
  with tf.name_scope(name, "itemwise_error_rate",
                     [total_error_rate, param_tensors, sample_tensor]):
    result_shape = [1]
    for p_tensor in param_tensors:
      result_shape = tf.broadcast_dynamic_shape(
          tf.shape(p_tensor), result_shape)
    if sample_tensor is not None:
      result_shape = tf.broadcast_dynamic_shape(
          tf.shape(sample_tensor)[1:], result_shape)
    num_items = tf.reduce_prod(result_shape)
    return total_error_rate / tf.cast(num_items, dtype=total_error_rate.dtype)
 def _validate_observation_data(self):
   # Check that observation index points and observation counts broadcast.
   assertions = []
   msg = ('Observation index point and observation counts are not '
          'broadcastable.')
   ndims = self.kernel.feature_ndims
   if (self.observation_index_points.shape[:-ndims].is_fully_defined() and
       self.observations.shape.is_fully_defined()):
     index_point_count = self.observation_index_points.shape[:-ndims]
     observation_count = self.observations.shape
     try:
       tf.broadcast_static_shape(index_point_count, observation_count)
     except ValueError:
       # Re-raise with our own more contextual error message.
       raise ValueError(msg[:-1] + ': {} and {}, respectively.'.format(
           index_point_count, observation_count))
   else:
     if self._validate_args:
       # Instead of an assertion of broadcastability, we simply append an op
       # to dynamically broadcast the two shapes; if this fails, the shapes
       # must not be broadcastable.
       broadcast_op = tf.broadcast_dynamic_shape(
           tf.shape(self.observation_index_points)[:-ndims],
           tf.shape(self.observations),
           name='check_that_index_points_and_observation_shapes_broadcast')
       assertions.append(broadcast_op)
   return assertions
示例#5
0
  def _apply_noisy_update(self, mom, grad, var):
    # Compute and apply the gradient update following
    # preconditioned Langevin dynamics
    stddev = tf.where(
        tf.squeeze(self._counter > self._burnin),
        tf.cast(tf.rsqrt(self._learning_rate), grad.dtype),
        tf.zeros([], grad.dtype))
    # Keep an exponentially weighted moving average of squared gradients.
    # Not thread safe
    decay_tensor = tf.cast(self._decay_tensor, grad.dtype)
    new_mom = decay_tensor * mom + (1. - decay_tensor) * tf.square(grad)
    preconditioner = tf.rsqrt(
        new_mom + tf.cast(self._diagonal_bias, grad.dtype))

    # Compute gradients of the preconsitionaer
    _, preconditioner_grads = diag_jacobian(
        xs=var,
        ys=preconditioner,
        parallel_iterations=self._parallel_iterations)

    mean = 0.5 * (preconditioner * grad *
                  tf.cast(self._data_size, grad.dtype)
                  - preconditioner_grads[0])
    stddev *= tf.sqrt(preconditioner)
    result_shape = tf.broadcast_dynamic_shape(tf.shape(mean),
                                              tf.shape(stddev))
    with tf.control_dependencies([tf.assign(mom, new_mom)]):
      return tf.random_normal(shape=result_shape,
                              mean=mean,
                              stddev=stddev,
                              dtype=grad.dtype)
示例#6
0
 def _cdf(self, x):
   broadcast_shape = tf.broadcast_dynamic_shape(
       tf.shape(x), self.batch_shape_tensor())
   zeros = tf.zeros(broadcast_shape, dtype=self.dtype)
   ones = tf.ones(broadcast_shape, dtype=self.dtype)
   broadcasted_x = x * ones
   result_if_not_big = tf.where(
       x < self.low, zeros, (broadcasted_x - self.low) / self.range())
   return tf.where(x >= self.high, ones, result_if_not_big)
 def check(t):
   samples_batch_shape = tf.shape(samples)[1:]
   broadcasted_batch_shape = tf.broadcast_dynamic_shape(
       samples_batch_shape, tf.shape(t))
   # This rank check ensures that I don't get a wrong answer from the
   # _shapes_ broadcasting against each other.
   samples_batch_ndims = tf.size(samples_batch_shape)
   ge = tf.assert_greater_equal(samples_batch_ndims, tf.rank(t))
   eq = tf.assert_equal(samples_batch_shape, broadcasted_batch_shape)
   return ge, eq
  def batch_shape_tensor(self):
    """Runtime batch shape of models represented by this component.

    Returns:
      batch_shape: `int` `Tensor` giving the broadcast batch shape of
        all model parameters. This should match the batch shape of
        derived state space models, i.e.,
        `self.make_state_space_model(...).batch_shape_tensor()`.
    """
    batch_shape = tf.constant([], dtype=tf.int32)
    for param in self.parameters:
      batch_shape = tf.broadcast_dynamic_shape(
          batch_shape, param.prior.batch_shape_tensor())
    return batch_shape
def determine_batch_event_shapes(grid, endpoint_affine):
  """Helper to infer batch_shape and event_shape."""
  with tf.name_scope(name="determine_batch_event_shapes"):
    # grid  # shape: [B, k, q]
    # endpoint_affine     # len=k, shape: [B, d, d]
    batch_shape = grid.shape[:-2]
    batch_shape_tensor = tf.shape(grid)[:-2]
    event_shape = None
    event_shape_tensor = None

    def _set_event_shape(shape, shape_tensor):
      if event_shape is None:
        return shape, shape_tensor
      return (tf.broadcast_static_shape(event_shape, shape),
              tf.broadcast_dynamic_shape(event_shape_tensor, shape_tensor))

    for aff in endpoint_affine:
      if aff.shift is not None:
        batch_shape = tf.broadcast_static_shape(batch_shape,
                                                aff.shift.shape[:-1])
        batch_shape_tensor = tf.broadcast_dynamic_shape(
            batch_shape_tensor,
            tf.shape(aff.shift)[:-1])
        event_shape, event_shape_tensor = _set_event_shape(
            aff.shift.shape[-1:],
            tf.shape(aff.shift)[-1:])

      if aff.scale is not None:
        batch_shape = tf.broadcast_static_shape(batch_shape,
                                                aff.scale.batch_shape)
        batch_shape_tensor = tf.broadcast_dynamic_shape(
            batch_shape_tensor, aff.scale.batch_shape_tensor())
        event_shape, event_shape_tensor = _set_event_shape(
            tf.TensorShape([aff.scale.range_dimension]),
            aff.scale.range_dimension_tensor()[tf.newaxis])

    return batch_shape, batch_shape_tensor, event_shape, event_shape_tensor
  def _log_prob(self, value):
    with tf.control_dependencies(self._runtime_assertions):
      # The argument `value` is a tensor of sequences of observations.
      # `observation_batch_shape` is the shape of that tensor with the
      # sequence part removed.
      # `observation_batch_shape` is then broadcast to the full batch shape
      # to give the `working_shape` that defines the shape of the result.

      observation_batch_shape = tf.shape(
          value)[:-1 - self._underlying_event_rank]
      # value :: observation_batch_shape num_steps observation_event_shape
      working_shape = tf.broadcast_dynamic_shape(observation_batch_shape,
                                                 self.batch_shape_tensor())
      log_init = tf.broadcast_to(self._log_init,
                                 tf.concat([working_shape,
                                            [self._num_states]], axis=0))
      # log_init :: working_shape num_states
      log_transition = self._log_trans

      # `observation_event_shape` is the shape of each sequence of observations
      # emitted by the model.
      observation_event_shape = tf.shape(
          value)[-1 - self._underlying_event_rank:]
      working_obs = tf.broadcast_to(value,
                                    tf.concat([working_shape,
                                               observation_event_shape],
                                              axis=0))
      # working_obs :: working_shape observation_event_shape
      r = self._underlying_event_rank

      # Move index into sequence of observations to front so we can apply
      # tf.foldl
      working_obs = util.move_dimension(working_obs,
                                        -1 - r, 0)[..., tf.newaxis]
      # working_obs :: num_steps working_shape underlying_event_shape
      observation_probs = (
          self._observation_distribution.log_prob(working_obs))

      def forward_step(log_prev_step, log_observation):
        return _log_vector_matrix(log_prev_step,
                                  log_transition) + log_observation

      fwd_prob = tf.foldl(forward_step, observation_probs, initializer=log_init)
      # fwd_prob :: working_shape num_states

      log_prob = tf.reduce_logsumexp(fwd_prob, axis=-1)
      # log_prob :: working_shape

      return log_prob
示例#11
0
def broadcast_batch_shape(distributions):
  """Get broadcast batch shape from distributions, statically if possible."""

  # Static case
  batch_shape = distributions[0].batch_shape
  for distribution in distributions:
    batch_shape = tf.broadcast_static_shape(batch_shape,
                                            distribution.batch_shape)
  if batch_shape.is_fully_defined():
    return batch_shape.as_list()

  # Fallback on dynamic.
  batch_shape = distributions[0].batch_shape_tensor()
  for distribution in distributions:
    batch_shape = tf.broadcast_dynamic_shape(batch_shape,
                                             distribution.batch_shape_tensor())

  return tf.convert_to_tensor(batch_shape)
示例#12
0
def random_rayleigh(shape, scale=None, dtype=tf.float32, seed=None, name=None):
  """Generates `Tensor` of positive reals drawn from a Rayleigh distributions.

  The probability density function of a Rayleigh distribution with `scale`
  parameter is given by:

  ```none
  f(x) = x scale**-2 exp(-x**2 0.5 scale**-2)
  ```

  For more details, see [Rayleigh distribution](
  https://en.wikipedia.org/wiki/Rayleigh_distribution)

  Args:
    shape: Vector-shaped, `int` `Tensor` representing shape of output.
    scale: (Optional) Positive `float` `Tensor` representing `Rayleigh` scale.
      Default value: `None` (i.e., `scale = 1.`).
    dtype: (Optional) TF `dtype` representing `dtype` of output.
      Default value: `tf.float32`.
    seed: (Optional) Python integer to seed the random number generator.
      Default value: `None` (i.e., no seed).
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'random_rayleigh').

  Returns:
    rayleigh: `Tensor` with specified `shape` and `dtype` consisting of positive
      real values drawn from a Rayleigh distribution with specified `scale`.
  """
  with tf.name_scope(name, 'random_rayleigh', [shape, scale, seed]):
    if scale is not None:
      # Its important to expand the shape to match scale's, otherwise we won't
      # have independent draws.
      scale = tf.convert_to_tensor(scale, dtype=dtype, name='scale')
      shape = tf.broadcast_dynamic_shape(shape, tf.shape(scale))
    x = tf.sqrt(-2. * tf.log(tf.random_uniform(
        shape,
        minval=0,
        maxval=1,
        dtype=dtype,
        seed=seed)))
    if scale is None:
      return x
    return x * scale
示例#13
0
def get_broadcast_shape(*tensors):
  """Get broadcast shape as a Python list of integers (preferred) or `Tensor`.

  Args:
    *tensors:  One or more `Tensor` objects (already converted!).

  Returns:
    broadcast shape:  Python list (if shapes determined statically), otherwise
      an `int32` `Tensor`.
  """
  # Try static.
  s_shape = tensors[0].shape
  for t in tensors[1:]:
    s_shape = tf.broadcast_static_shape(s_shape, t.shape)
  if s_shape.is_fully_defined():
    return s_shape.as_list()

  # Fallback on dynamic.
  d_shape = tf.shape(tensors[0])
  for t in tensors[1:]:
    d_shape = tf.broadcast_dynamic_shape(d_shape, tf.shape(t))
  return d_shape
示例#14
0
def iou(box_1, box_2):
    #box_1 is the pred box and contains multiple class prob values, one for each class
    #box_2 is the true box and contains just one class value
    box_1 = tf.expand_dims(box_1, -2)
    box_2 = tf.expand_dims(box_2, 0)

    new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2))
    box_1 = tf.broadcast_to(box_1, new_shape)
    box_2 = tf.broadcast_to(box_2, new_shape)

    intersection_w = tf.maximum(
        tf.minimum(box_1[..., 2], box_2[..., 2]) -
        tf.maximum(box_1[..., 0], box_2[..., 0]), 0)
    intersection_h = tf.maximum(
        tf.minimum(box_1[..., 3], box_2[..., 3]) -
        tf.maximum(box_1[..., 1], box_2[..., 1]), 0)
    intersection_area = intersection_w * intersection_h

    box_1_area = (box_1[..., 2] - box_1[..., 0]) * (box_1[..., 3] -
                                                    box_1[..., 1])
    box_2_area = (box_2[..., 2] - box_2[..., 0]) * (box_2[..., 3] -
                                                    box_2[..., 1])

    return intersection_area / (box_1_area + box_2_area - intersection_area)
示例#15
0
def broadcast_iou(box_1, box_2):
    # box_1: (..., (x1, y1, x2, y2))
    # box_2: (N, (x1, y1, x2, y2))

    # broadcast boxes
    box_1 = tf.expand_dims(box_1, -2)
    box_2 = tf.expand_dims(box_2, 0)
    # new_shape: (..., N, (x1, y1, x2, y2))
    new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2))
    box_1 = tf.broadcast_to(box_1, new_shape)
    box_2 = tf.broadcast_to(box_2, new_shape)

    int_w = tf.maximum(
        tf.minimum(box_1[..., 2], box_2[..., 2]) -
        tf.maximum(box_1[..., 0], box_2[..., 0]), 0)
    int_h = tf.maximum(
        tf.minimum(box_1[..., 3], box_2[..., 3]) -
        tf.maximum(box_1[..., 1], box_2[..., 1]), 0)
    int_area = int_w * int_h
    box_1_area = (box_1[..., 2] - box_1[..., 0]) * \
        (box_1[..., 3] - box_1[..., 1])
    box_2_area = (box_2[..., 2] - box_2[..., 0]) * \
        (box_2[..., 3] - box_2[..., 1])
    return int_area / (box_1_area + box_2_area - int_area)
示例#16
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(self.df), self.scale_operator.batch_shape_tensor())
示例#17
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(
         tf.shape(input=self.peak),
         tf.broadcast_dynamic_shape(tf.shape(input=self.low),
                                    tf.shape(input=self.high)))
 def _set_event_shape(shape, shape_tensor):
   if event_shape is None:
     return shape, shape_tensor
   return (tf.broadcast_static_shape(event_shape, shape),
           tf.broadcast_dynamic_shape(event_shape_tensor, shape_tensor))
示例#19
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(input=self.total_count), tf.shape(input=self.probs))
示例#20
0
 def _batch_shape_tensor(self, low=None, high=None):
     return tf.broadcast_dynamic_shape(
         tf.shape(self.low if low is None else low),
         tf.shape(self.high if high is None else high),
     )
示例#21
0
    def __init__(self,
                 mean,
                 stddev=None,
                 logstd=None,
                 group_event_ndims=None,
                 check_numerics=False,
                 name=None,
                 scope=None):
        # check the arguments
        if (stddev is None and logstd is None) or \
                (stddev is not None and logstd is not None):
            raise ValueError('One and only one of `stddev`, `logstd` should '
                             'be specified.')
        dtype = get_preferred_tensor_dtype(mean)
        if not dtype.is_floating:
            raise TypeError('Normal distribution parameters must be float '
                            'numbers.')

        super(Normal, self).__init__(
            group_event_ndims=group_event_ndims,
            check_numerics=check_numerics,
            name=name,
            scope=scope,
        )

        with reopen_variable_scope(self.variable_scope):
            with tf.name_scope('init'):
                # obtain parameter tensors
                mean = tf.convert_to_tensor(mean, dtype=dtype)
                if stddev is not None:
                    stddev = tf.convert_to_tensor(stddev, dtype=dtype)
                    self._stdx = stddev
                    self._stdx_is_log = False
                else:
                    logstd = tf.convert_to_tensor(logstd, dtype=dtype)
                    self._stdx = logstd
                    self._stdx_is_log = True

                # check the shape and data types of parameters
                self._mean = mean
                try:
                    self._static_batch_shape = tf.broadcast_static_shape(
                        self._mean.get_shape(), self._stdx.get_shape())
                except ValueError:
                    raise ValueError(
                        '`mean` and `stddev`/`logstd` should be '
                        'broadcastable to match each other (%r vs %r).' %
                        (self._mean.get_shape(), self._stdx.get_shape()))
                self._dynamic_batch_shape = tf.broadcast_dynamic_shape(
                    tf.shape(self._mean), tf.shape(self._stdx))

                # derive the attributes of this Normal distribution
                if self._stdx_is_log:
                    self._stddev = self._check_numerics(
                        tf.exp(self._stdx, name='stddev'), 'stddev')
                    self._logstd = self._stdx
                    self._var = self._check_numerics(
                        tf.exp(tf.constant(2., dtype=dtype) * self._logstd,
                               name='variance'), 'variance')
                    self._precision = self._check_numerics(
                        tf.exp(tf.constant(-2., dtype=dtype) * self._logstd,
                               name='precision'), 'precision')
                else:
                    self._stddev = self._stdx
                    self._logstd = self._check_numerics(
                        tf.log(self._stdx, name='logstd'), 'logstd')
                    self._var = tf.square(self._stddev, name='variance')
                    self._precision = self._check_numerics(
                        tf.divide(tf.constant(1., dtype=dtype),
                                  self._var,
                                  name='precision'), 'precision')
                self._logvar = tf.multiply(tf.constant(2., dtype=dtype),
                                           self._logstd,
                                           name='logvar')
                self._log_prec = tf.negative(self._logvar,
                                             name='log_precision')
示例#22
0
    def __init__(self,
                 mean,
                 log_scale,
                 bin_size,
                 min_val=None,
                 max_val=None,
                 dtype=tf.float32,
                 biased_edges=True,
                 discretize_given=True,
                 discretize_sample=True,
                 epsilon=1e-7):
        """
        Construct a new :class:`DiscretizedLogistic`.

        Args:
            mean: A Tensor, the `mean`.
            log_scale: A Tensor, the `log(scale)`.
            bin_size: A scalar, the `bin_size`.
            min_val: A scalar, the minimum possible value of `x`.
            max_val: A scalar, the maximum possible value of `x`.
            dtype: The data type of `x`.
            biased_edges: Whether or not to use bias density for edge values?
                See above.
            discretize_given (bool): Whether or not to discretize `given`
                in :meth:`log_prob` and :meth:`prob`?
            discretize_sample (bool): Whether or not to discretize the
                generated samples in :meth:`sample`?
            epsilon: Small float to avoid dividing by zero or taking
                logarithm of zero.
        """
        # check the arguments
        mean = tf.convert_to_tensor(mean)
        param_dtype = mean.dtype
        log_scale = tf.convert_to_tensor(log_scale)
        dtype = tf.as_dtype(dtype)

        if not is_integer_number(bin_size) and not dtype.is_floating:
            raise ValueError(
                '`bin_size` is a float number, but `dtype` is not a float '
                'number type: {}'.format(dtype))

        if (min_val is None and max_val is not None) or \
                (min_val is not None and max_val is None):
            raise ValueError('`min_val` and `max_val` must be both None or '
                             'neither None.')

        if max_val is not None and min_val is not None and \
                not is_integer_number((max_val - min_val) / bin_size):
            raise ValueError(
                '`min_val - max_val` must be multiples of `bin_size`: '
                'max_val - min_val = {} vs bin_size = {}'.format(
                    max_val - min_val, bin_size))

        # infer the batch shape
        try:
            batch_static_shape = tf.broadcast_static_shape(
                mean.get_shape(), log_scale.get_shape())
        except ValueError:
            raise ValueError('The shape of `mean` and `log_scale` cannot '
                             'be broadcasted: mean {} vs log_scale {}'.format(
                                 mean, log_scale))

        with tf.name_scope('DiscretizedLogistic.init'):
            batch_shape = tf.broadcast_dynamic_shape(tf.shape(mean),
                                                     tf.shape(log_scale))

        # memorize the arguments and call parent constructor
        bin_size = convert_to_tensor_and_cast(bin_size, param_dtype)
        if min_val is not None:
            min_val = convert_to_tensor_and_cast(min_val, param_dtype)
        if max_val is not None:
            max_val = convert_to_tensor_and_cast(max_val, param_dtype)

        self._mean = mean
        self._log_scale = log_scale
        self._param_dtype = param_dtype
        self._bin_size = bin_size
        self._min_val = min_val
        self._max_val = max_val
        self._biased_edges = bool(biased_edges)
        self._discretize_given = bool(discretize_given)
        self._discretize_sample = bool(discretize_sample)
        self._epsilon = epsilon

        super(DiscretizedLogistic,
              self).__init__(dtype=dtype,
                             is_continuous=not self._discretize_sample,
                             is_reparameterized=not self._discretize_sample,
                             batch_shape=batch_shape,
                             batch_static_shape=batch_static_shape,
                             value_ndims=0)
示例#23
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(self.mean_direction)[:-1],
       tf.shape(self.concentration))
示例#24
0
def batch_interp_regular_nd_grid(x,
                                 x_ref_min,
                                 x_ref_max,
                                 y_ref,
                                 axis,
                                 fill_value='constant_extension',
                                 name=None):
    """Multi-linear interpolation on a regular (constant spacing) grid.

  Given [a batch of] reference values, this function computes a multi-linear
  interpolant and evaluates it on [a batch of] of new `x` values.

  The interpolant is built from reference values indexed by `nd` dimensions
  of `y_ref`, starting at `axis`.

  For example, take the case of a `2-D` scalar valued function and no leading
  batch dimensions.  In this case, `y_ref.shape = [C1, C2]` and `y_ref[i, j]`
  is the reference value corresponding to grid point

  ```
  [x_ref_min[0] + i * (x_ref_max[0] - x_ref_min[0]) / (C1 - 1),
   x_ref_min[1] + j * (x_ref_max[1] - x_ref_min[1]) / (C2 - 1)]
  ```

  In the general case, dimensions to the left of `axis` in `y_ref` are broadcast
  with leading dimensions in `x`, `x_ref_min`, `x_ref_max`.

  Args:
    x: Numeric `Tensor` The x-coordinates of the interpolated output values for
      each batch.  Shape `[..., D, nd]`, designating [a batch of] `D`
      coordinates in `nd` space.  `D` must be `>= 1` and is not a batch dim.
    x_ref_min:  `Tensor` of same `dtype` as `x`.  The minimum values of the
      (implicitly defined) reference `x_ref`.  Shape `[..., nd]`.
    x_ref_max:  `Tensor` of same `dtype` as `x`.  The maximum values of the
      (implicitly defined) reference `x_ref`.  Shape `[..., nd]`.
    y_ref:  `Tensor` of same `dtype` as `x`.  The reference output values. Shape
      `[..., C1, ..., Cnd, B1,...,BM]`, designating [a batch of] reference
      values indexed by `nd` dimensions, of a shape `[B1,...,BM]` valued
      function (for `M >= 0`).
    axis:  Scalar integer `Tensor`.  Dimensions `[axis, axis + nd)` of `y_ref`
      index the interpolation table.  E.g. `3-D` interpolation of a scalar
      valued function requires `axis=-3` and a `3-D` matrix valued function
      requires `axis=-5`.
    fill_value:  Determines what values output should take for `x` values that
      are below `x_ref_min` or above `x_ref_max`. Scalar `Tensor` or
      "constant_extension" ==> Extend as constant function.
      Default value: `"constant_extension"`
    name:  A name to prepend to created ops.
      Default value: `"batch_interp_regular_nd_grid"`.

  Returns:
    y_interp:  Interpolation between members of `y_ref`, at points `x`.
      `Tensor` of same `dtype` as `x`, and shape `[..., D, B1, ..., BM].`

  Raises:
    ValueError:  If `rank(x) < 2` is determined statically.
    ValueError:  If `axis` is not a scalar is determined statically.
    ValueError:  If `axis + nd > rank(y_ref)` is determined statically.

  #### Examples

  Interpolate a function of one variable.

  ```python
  y_ref = tf.exp(tf.linspace(start=0., stop=10., 20))

  tfp.math.batch_interp_regular_nd_grid(
      # x.shape = [3, 1], x_ref_min/max.shape = [1].  Trailing `1` for `1-D`.
      x=[[6.0], [0.5], [3.3]], x_ref_min=[0.], x_ref_max=[1.], y_ref=y_ref)
  ==> approx [exp(6.0), exp(0.5), exp(3.3)]
  ```

  Interpolate a scalar function of two variables.

  ```python
  x_ref_min = [0., 2 * np.pi]
  x_ref_max = [0., 2 * np.pi]

  # Build y_ref.
  x0s, x1s = tf.meshgrid(
      tf.linspace(x_ref_min[0], x_ref_max[0], num=100),
      tf.linspace(x_ref_min[1], x_ref_max[1], num=100),
      indexing='ij')

  def func(x0, x1):
    return tf.sin(x0) * tf.cos(x1)

  y_ref = func(x0s, x1s)

  x = np.pi * tf.random_uniform(shape=(10, 2))

  tfp.math.batch_interp_regular_nd_grid(x, x_ref_min, x_ref_max, y_ref, axis=-2)
  ==> tf.sin(x[:, 0]) * tf.cos(x[:, 1])
  ```

  """
    with tf.compat.v1.name_scope(
            name,
            default_name='interp_regular_nd_grid',
            values=[x, x_ref_min, x_ref_max, y_ref, fill_value]):
        dtype = dtype_util.common_dtype([x, x_ref_min, x_ref_max, y_ref],
                                        preferred_dtype=tf.float32)

        # Arg checking.
        if isinstance(fill_value, str):
            if fill_value != 'constant_extension':
                raise ValueError(
                    'A fill value ({}) was not an allowed string ({})'.format(
                        fill_value, 'constant_extension'))
        else:
            fill_value = tf.convert_to_tensor(value=fill_value,
                                              name='fill_value',
                                              dtype=dtype)
            _assert_ndims_statically(fill_value, expect_ndims=0)

        # x.shape = [..., nd].
        x = tf.convert_to_tensor(value=x, name='x', dtype=dtype)
        _assert_ndims_statically(x, expect_ndims_at_least=2)

        # y_ref.shape = [..., C1,...,Cnd, B1,...,BM]
        y_ref = tf.convert_to_tensor(value=y_ref, name='y_ref', dtype=dtype)

        # x_ref_min.shape = [nd]
        x_ref_min = tf.convert_to_tensor(value=x_ref_min,
                                         name='x_ref_min',
                                         dtype=dtype)
        x_ref_max = tf.convert_to_tensor(value=x_ref_max,
                                         name='x_ref_max',
                                         dtype=dtype)
        _assert_ndims_statically(x_ref_min,
                                 expect_ndims_at_least=1,
                                 expect_static=True)
        _assert_ndims_statically(x_ref_max,
                                 expect_ndims_at_least=1,
                                 expect_static=True)

        # nd is the number of dimensions indexing the interpolation table, it's the
        # "nd" in the function name.
        nd = tf.compat.dimension_value(x_ref_min.shape[-1])
        if nd is None:
            raise ValueError('`x_ref_min.shape[-1]` must be known statically.')
        x_ref_max.shape[-1:].assert_is_compatible_with(x_ref_min.shape[-1:])

        # Convert axis and check it statically.
        axis = tf.convert_to_tensor(value=axis, dtype=tf.int32, name='axis')
        axis = distribution_util.make_non_negative_axis(axis, tf.rank(y_ref))
        axis.shape.assert_has_rank(0)
        axis_ = tf.get_static_value(axis)
        y_ref_rank_ = tf.get_static_value(tf.rank(y_ref))
        if axis_ is not None and y_ref_rank_ is not None:
            if axis_ + nd > y_ref_rank_:
                raise ValueError(
                    'Since dims `[axis, axis + nd)` index the interpolation table, we '
                    'must have `axis + nd <= rank(y_ref)`.  Found: '
                    '`axis`: {},  rank(y_ref): {}, and inferred `nd` from trailing '
                    'dimensions of `x_ref_min` to be {}.'.format(
                        axis_, y_ref_rank_, nd))

        x_batch_shape = tf.shape(input=x)[:-2]
        x_ref_min_batch_shape = tf.shape(input=x_ref_min)[:-1]
        x_ref_max_batch_shape = tf.shape(input=x_ref_max)[:-1]
        y_ref_batch_shape = tf.shape(input=y_ref)[:axis]

        # Do a brute-force broadcast of batch dims (add zeros).
        batch_shape = y_ref_batch_shape
        for tensor in [
                x_batch_shape, x_ref_min_batch_shape, x_ref_max_batch_shape
        ]:
            batch_shape = tf.broadcast_dynamic_shape(batch_shape, tensor)

        def _batch_of_zeros_with_rightmost_singletons(n_singletons):
            """Return Tensor of zeros with some singletons on the rightmost dims."""
            ones = tf.ones(shape=[n_singletons], dtype=tf.int32)
            return tf.zeros(shape=tf.concat([batch_shape, ones], axis=0),
                            dtype=dtype)

        x += _batch_of_zeros_with_rightmost_singletons(n_singletons=2)
        x_ref_min += _batch_of_zeros_with_rightmost_singletons(n_singletons=1)
        x_ref_max += _batch_of_zeros_with_rightmost_singletons(n_singletons=1)
        y_ref += _batch_of_zeros_with_rightmost_singletons(
            n_singletons=tf.rank(y_ref) - axis)

        return _batch_interp_with_gather_nd(
            x=x,
            x_ref_min=x_ref_min,
            x_ref_max=x_ref_max,
            y_ref=y_ref,
            nd=nd,
            fill_value=fill_value,
            batch_dims=tf.get_static_value(tf.rank(x)) - 2)
示例#25
0
 def _batch_shape(self):
     return tf.broadcast_dynamic_shape(tf.shape(self.minval),
                                       tf.shape(self.maxval))
示例#26
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(
         tf.shape(self.mean_direction)[:-1], tf.shape(self.concentration))
示例#27
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       [] if self.amplitude is None else tf.shape(input=self.amplitude),
       [] if self.length_scale is None else tf.shape(input=self.length_scale))
示例#28
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(
         self.distribution.batch_shape_tensor(),
         tf.shape(self.mixture_distribution.logits))[:-1]
示例#29
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(
         tf.shape(self._loc)[:-1], self._radius_dist.batch_shape_tensor)
示例#30
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(self.low),
       tf.shape(self.high))
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       [] if self.amplitude is None else tf.shape(self.amplitude),
       [] if self.length_scale is None else tf.shape(self.length_scale))
def sample_annealed_importance_chain(
    num_steps,
    proposal_log_prob_fn,
    target_log_prob_fn,
    current_state,
    make_kernel_fn,
    parallel_iterations=10,
    name=None):
  """Runs annealed importance sampling (AIS) to estimate normalizing constants.

  This function uses Hamiltonian Monte Carlo to sample from a series of
  distributions that slowly interpolates between an initial "proposal"
  distribution:

  `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)`

  and the target distribution:

  `exp(target_log_prob_fn(x) - target_log_normalizer)`,

  accumulating importance weights along the way. The product of these
  importance weights gives an unbiased estimate of the ratio of the
  normalizing constants of the initial distribution and the target
  distribution:

  `E[exp(ais_weights)] = exp(target_log_normalizer - proposal_log_normalizer)`.

  Note: `proposal_log_prob_fn` and `target_log_prob_fn` are called exactly three
  times (although this may be reduced to two times, in the future).

  Args:
    num_steps: Integer number of Markov chain updates to run. More
      iterations means more expense, but smoother annealing between q
      and p, which in turn means exponentially lower variance for the
      normalizing constant estimator.
    proposal_log_prob_fn: Python callable that returns the log density of the
      initial distribution.
    target_log_prob_fn: Python callable which takes an argument like
      `current_state` (or `*current_state` if it's a list) and returns its
      (possibly unnormalized) log-density under the target distribution.
    current_state: `Tensor` or Python `list` of `Tensor`s representing the
      current state(s) of the Markov chain(s). The first `r` dimensions index
      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
    make_kernel_fn: Python `callable` which returns a `TransitionKernel`-like
      object. Must take one argument representing the `TransitionKernel`'s
      `target_log_prob_fn`. The `target_log_prob_fn` argument represents the
      `TransitionKernel`'s target log distribution.  Note:
      `sample_annealed_importance_chain` creates a new `target_log_prob_fn`
      which
    is an interpolation between the supplied `target_log_prob_fn` and
    `proposal_log_prob_fn`; it is this interpolated function which is used as an
    argument to `make_kernel_fn`.
    parallel_iterations: The number of iterations allowed to run in parallel.
        It must be a positive integer. See `tf.while_loop` for more details.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., "sample_annealed_importance_chain").

  Returns:
    next_state: `Tensor` or Python list of `Tensor`s representing the
      state(s) of the Markov chain(s) at the final iteration. Has same shape as
      input `current_state`.
    ais_weights: Tensor with the estimated weight(s). Has shape matching
      `target_log_prob_fn(current_state)`.
    kernel_results: `collections.namedtuple` of internal calculations used to
      advance the chain.

  #### Examples

  ##### Estimate the normalizing constant of a log-gamma distribution.

  ```python
  tfd = tfp.distributions

  # Run 100 AIS chains in parallel
  num_chains = 100
  dims = 20
  dtype = np.float32

  proposal = tfd.MultivatiateNormalDiag(
     loc=tf.zeros([dims], dtype=dtype))

  target = tfd.TransformedDistribution(
    distribution=tfd.Gamma(concentration=dtype(2),
                           rate=dtype(3)),
    bijector=tfp.bijectors.Invert(tfp.bijectors.Exp()),
    event_shape=[dims])

  chains_state, ais_weights, kernels_results = (
      tfp.mcmc.sample_annealed_importance_chain(
          num_steps=1000,
          proposal_log_prob_fn=proposal.log_prob,
          target_log_prob_fn=target.log_prob,
          current_state=proposal.sample(num_chains),
          make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=tlp_fn,
            step_size=0.2,
            num_leapfrog_steps=2)))

  log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights)
                              - np.log(num_chains))
  log_true_normalizer = tf.lgamma(2.) - 2. * tf.log(3.)
  ```

  ##### Estimate marginal likelihood of a Bayesian regression model.

  ```python
  tfd = tfp.distributions

  def make_prior(dims, dtype):
    return tfd.MultivariateNormalDiag(
        loc=tf.zeros(dims, dtype))

  def make_likelihood(weights, x):
    return tfd.MultivariateNormalDiag(
        loc=tf.tensordot(weights, x, axes=[[0], [-1]]))

  # Run 100 AIS chains in parallel
  num_chains = 100
  dims = 10
  dtype = np.float32

  # Make training data.
  x = np.random.randn(num_chains, dims).astype(dtype)
  true_weights = np.random.randn(dims).astype(dtype)
  y = np.dot(x, true_weights) + np.random.randn(num_chains)

  # Setup model.
  prior = make_prior(dims, dtype)
  def target_log_prob_fn(weights):
    return prior.log_prob(weights) + make_likelihood(weights, x).log_prob(y)

  proposal = tfd.MultivariateNormalDiag(
      loc=tf.zeros(dims, dtype))

  weight_samples, ais_weights, kernel_results = (
      tfp.mcmc.sample_annealed_importance_chain(
        num_steps=1000,
        proposal_log_prob_fn=proposal.log_prob,
        target_log_prob_fn=target_log_prob_fn
        current_state=tf.zeros([num_chains, dims], dtype),
        make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo(
          target_log_prob_fn=tlp_fn,
          step_size=0.1,
          num_leapfrog_steps=2)))
  log_normalizer_estimate = (tf.reduce_logsumexp(ais_weights)
                             - np.log(num_chains))
  ```

  """
  with tf.name_scope(
      name, "sample_annealed_importance_chain",
      [num_steps, current_state]):
    num_steps = tf.convert_to_tensor(
        num_steps,
        dtype=tf.int32,
        name="num_steps")
    if mcmc_util.is_list_like(current_state):
      current_state = [tf.convert_to_tensor(s, name="current_state")
                       for s in current_state]
    else:
      current_state = tf.convert_to_tensor(
          current_state, name="current_state")

    def _make_convex_combined_log_prob_fn(iter_):
      def _fn(*args):
        p = tf.identity(proposal_log_prob_fn(*args), name="proposal_log_prob")
        t = tf.identity(target_log_prob_fn(*args), name="target_log_prob")
        dtype = p.dtype.base_dtype
        beta = tf.cast(iter_ + 1, dtype) / tf.cast(num_steps, dtype)
        return tf.identity(beta * t + (1. - beta) * p,
                           name="convex_combined_log_prob")
      return _fn

    def _loop_body(iter_, ais_weights, current_state, kernel_results):
      """Closure which implements `tf.while_loop` body."""
      x = (current_state if mcmc_util.is_list_like(current_state)
           else [current_state])
      proposal_log_prob = proposal_log_prob_fn(*x)
      target_log_prob = target_log_prob_fn(*x)
      ais_weights += ((target_log_prob - proposal_log_prob) /
                      tf.cast(num_steps, ais_weights.dtype))
      kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_))
      next_state, inner_results = kernel.one_step(
          current_state, kernel_results.inner_results)
      kernel_results = AISResults(
          proposal_log_prob=proposal_log_prob,
          target_log_prob=target_log_prob,
          inner_results=inner_results,
      )
      return [iter_ + 1, ais_weights, next_state, kernel_results]

    def _bootstrap_results(init_state):
      """Creates first version of `previous_kernel_results`."""
      kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_=0))
      inner_results = kernel.bootstrap_results(init_state)

      convex_combined_log_prob = inner_results.accepted_results.target_log_prob
      dtype = convex_combined_log_prob.dtype.as_numpy_dtype
      shape = tf.shape(convex_combined_log_prob)
      proposal_log_prob = tf.fill(shape, dtype(np.nan),
                                  name="bootstrap_proposal_log_prob")
      target_log_prob = tf.fill(shape, dtype(np.nan),
                                name="target_target_log_prob")

      return AISResults(
          proposal_log_prob=proposal_log_prob,
          target_log_prob=target_log_prob,
          inner_results=inner_results,
      )

    previous_kernel_results = _bootstrap_results(current_state)
    inner_results = previous_kernel_results.inner_results

    ais_weights = tf.zeros(
        shape=tf.broadcast_dynamic_shape(
            tf.shape(inner_results.proposed_results.target_log_prob),
            tf.shape(inner_results.accepted_results.target_log_prob)),
        dtype=inner_results.proposed_results.target_log_prob.dtype.base_dtype)

    [_, ais_weights, current_state, kernel_results] = tf.while_loop(
        cond=lambda iter_, *args: iter_ < num_steps,
        body=_loop_body,
        loop_vars=[
            np.int32(0),  # iter_
            ais_weights,
            current_state,
            previous_kernel_results,
        ],
        parallel_iterations=parallel_iterations)

    return [current_state, ais_weights, kernel_results]
示例#33
0
    def posterior_mode(self, observations):
        """Compute maximum likelihood sequence of hidden states.

    When this function is provided with a sequence of observations
    `x[0], ..., x[num_steps - 1]`, it returns the sequence of hidden
    states `z[0], ..., z[num_steps - 1]`, drawn from the underlying
    Markov chain, that is most likely to yield those observations.

    It uses the [Viterbi algorithm](
    https://en.wikipedia.org/wiki/Viterbi_algorithm).

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Note: if there isn't a unique most likely sequence then one
    of the equally most likely sequences is chosen.

    Args:
      observations: A tensor representing a batch of observations
      made on the hidden Markov model.  The rightmost dimensions
      of this tensor correspond to the dimensions of the
      observation distributions of the underlying Markov chain.
      The next dimension from the right indexes the steps in a
      sequence of observations from a single sample from the
      hidden Markov model.  The size of this dimension should
      match the `num_steps` parameter of the hidden Markov model
      object.  The other dimensions are the dimensions of the
      batch and these are broadcast with the hidden Markov model's
      parameters.

    Returns:
      A tensor representing the most likely sequence of hidden
      states. The rightmost dimension of this tensor will equal
      the `num_steps` parameter providing one hidden state for
      each step. The other dimensions are those of the batch.

    Raises:
      ValueError: if the `observations` tensor does not consist of
      sequences of `num_steps` observations.

    #### Examples

    ```python
    tfd = tfp.distributions

    # A simple weather model.

    # Represent a cold day with 0 and a hot day with 1.
    # Suppose the first day of a sequence has a 0.8 chance of being cold.

    initial_distribution = tfd.Categorical(probs=[0.8, 0.2])

    # Suppose a cold day has a 30% chance of being followed by a hot day
    # and a hot day has a 20% chance of being followed by a cold day.

    transition_distribution = tfd.Categorical(probs=[[0.7, 0.3],
                                                     [0.2, 0.8]])

    # Suppose additionally that on each day the temperature is
    # normally distributed with mean and standard deviation 0 and 5 on
    # a cold day and mean and standard deviation 15 and 10 on a hot day.

    observation_distribution = tfd.Normal(loc=[0., 15.], scale=[5., 10.])

    # This gives the hidden Markov model:

    model = tfd.HiddenMarkovModel(
        initial_distribution=initial_distribution,
        transition_distribution=transition_distribution,
        observation_distribution=observation_distribution,
        num_steps=7)

    # Suppose we observe gradually rising temperatures over a week:
    temps = [-2., 0., 2., 4., 6., 8., 10.]

    # We can now compute the most probable sequence of hidden states:

    model.posterior_mode(temps)

    # The result is [0 0 0 0 0 1 1] telling us that the transition
    # from "cold" to "hot" most likely happened between the
    # 5th and 6th days.
    ```
    """

        with tf.compat.v1.name_scope("posterior_mode", values=[observations]):
            with tf.control_dependencies(self._runtime_assertions):
                observation_tensor_shape = tf.shape(input=observations)

                with self._observation_shape_preconditions(
                        observation_tensor_shape):
                    observation_batch_shape = observation_tensor_shape[:-1 -
                                                                       self.
                                                                       _underlying_event_rank]
                    observation_event_shape = observation_tensor_shape[
                        -1 - self._underlying_event_rank:]

                    batch_shape = tf.broadcast_dynamic_shape(
                        observation_batch_shape, self.batch_shape_tensor())
                    log_init = tf.broadcast_to(
                        self._log_init,
                        tf.concat([batch_shape, [self._num_states]], axis=0))

                    observations = tf.broadcast_to(
                        observations,
                        tf.concat([batch_shape, observation_event_shape],
                                  axis=0))
                    observation_rank = tf.rank(observations)
                    underlying_event_rank = self._underlying_event_rank
                    observations = util.move_dimension(
                        observations,
                        observation_rank - underlying_event_rank - 1, 0)

                    # We need to compute the probability of each observation for
                    # each possible state.
                    # This requires inserting an extra index just before the
                    # observation event indices that will be broadcast with the
                    # last batch index in `observation_distribution`.
                    observations = tf.expand_dims(
                        observations, observation_rank - underlying_event_rank)
                    observation_log_probs = self._observation_distribution.log_prob(
                        observations)

                    log_prob = log_init + observation_log_probs[0]

                    if self._num_steps == 1:
                        most_likely_end = tf.argmax(input=log_prob, axis=-1)
                        return most_likely_end[..., tf.newaxis]

                    def forward_step(previous_step_pair, log_prob_observation):
                        log_prob_previous = previous_step_pair[0]
                        log_prob = (log_prob_previous[..., tf.newaxis] +
                                    self._log_trans +
                                    log_prob_observation[..., tf.newaxis, :])
                        most_likely_given_successor = tf.argmax(input=log_prob,
                                                                axis=-2)
                        max_log_p_given_successor = tf.reduce_max(
                            input_tensor=log_prob, axis=-2)
                        return (max_log_p_given_successor,
                                most_likely_given_successor)

                    forward_log_probs, all_most_likely_given_successor = tf.scan(
                        forward_step,
                        observation_log_probs[1:],
                        initializer=(log_prob,
                                     tf.zeros(tf.shape(input=log_init),
                                              dtype=tf.int64)),
                        name="forward_log_probs")

                    most_likely_end = tf.argmax(input=forward_log_probs[-1],
                                                axis=-1)

                    # We require the operation that gives C from A and B where
                    # C[i...j] = A[i...j, B[i...j]]
                    # and A = most_likely_given_successor
                    #     B = most_likely_successor.
                    # tf.gather requires indices of known shape so instead we use
                    # reduction with tf.one_hot(B) to pick out elements from B
                    def backward_step(most_likely_successor,
                                      most_likely_given_successor):
                        return tf.reduce_sum(
                            input_tensor=(most_likely_given_successor *
                                          tf.one_hot(most_likely_successor,
                                                     self._num_states,
                                                     dtype=tf.int64)),
                            axis=-1)

                    backward_scan = tf.scan(backward_step,
                                            all_most_likely_given_successor,
                                            most_likely_end,
                                            reverse=True)
                    most_likely_sequences = tf.concat(
                        [backward_scan, [most_likely_end]], axis=0)
                    return util.move_dimension(most_likely_sequences, 0, -1)
示例#34
0
def sample_annealed_importance_chain(num_steps,
                                     proposal_log_prob_fn,
                                     target_log_prob_fn,
                                     current_state,
                                     make_kernel_fn,
                                     parallel_iterations=10,
                                     name=None):
    """Runs annealed importance sampling (AIS) to estimate normalizing constants.

  This function uses an MCMC transition operator (e.g., Hamiltonian Monte Carlo)
  to sample from a series of distributions that slowly interpolates between
  an initial "proposal" distribution:

  `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)`

  and the target distribution:

  `exp(target_log_prob_fn(x) - target_log_normalizer)`,

  accumulating importance weights along the way. The product of these
  importance weights gives an unbiased estimate of the ratio of the
  normalizing constants of the initial distribution and the target
  distribution:

  `E[exp(ais_weights)] = exp(target_log_normalizer - proposal_log_normalizer)`.

  Note: When running in graph mode, `proposal_log_prob_fn` and
  `target_log_prob_fn` are called exactly three times (although this may be
  reduced to two times in the future).

  Args:
    num_steps: Integer number of Markov chain updates to run. More
      iterations means more expense, but smoother annealing between q
      and p, which in turn means exponentially lower variance for the
      normalizing constant estimator.
    proposal_log_prob_fn: Python callable that returns the log density of the
      initial distribution.
    target_log_prob_fn: Python callable which takes an argument like
      `current_state` (or `*current_state` if it's a list) and returns its
      (possibly unnormalized) log-density under the target distribution.
    current_state: `Tensor` or Python `list` of `Tensor`s representing the
      current state(s) of the Markov chain(s). The first `r` dimensions index
      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
    make_kernel_fn: Python `callable` which returns a `TransitionKernel`-like
      object. Must take one argument representing the `TransitionKernel`'s
      `target_log_prob_fn`. The `target_log_prob_fn` argument represents the
      `TransitionKernel`'s target log distribution.  Note:
      `sample_annealed_importance_chain` creates a new `target_log_prob_fn`
      which is an interpolation between the supplied `target_log_prob_fn` and
      `proposal_log_prob_fn`; it is this interpolated function which is used as
      an argument to `make_kernel_fn`.
    parallel_iterations: The number of iterations allowed to run in parallel.
        It must be a positive integer. See `tf.while_loop` for more details.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., "sample_annealed_importance_chain").

  Returns:
    next_state: `Tensor` or Python list of `Tensor`s representing the
      state(s) of the Markov chain(s) at the final iteration. Has same shape as
      input `current_state`.
    ais_weights: Tensor with the estimated weight(s). Has shape matching
      `target_log_prob_fn(current_state)`.
    kernel_results: `collections.namedtuple` of internal calculations used to
      advance the chain.

  #### Examples

  ##### Estimate the normalizing constant of a log-gamma distribution.

  ```python
  tfd = tfp.distributions

  # Run 100 AIS chains in parallel
  num_chains = 100
  dims = 20
  dtype = np.float32

  proposal = tfd.MultivatiateNormalDiag(
     loc=tf.zeros([dims], dtype=dtype))

  target = tfd.TransformedDistribution(
    distribution=tfd.Gamma(concentration=dtype(2),
                           rate=dtype(3)),
    bijector=tfp.bijectors.Invert(tfp.bijectors.Exp()),
    event_shape=[dims])

  chains_state, ais_weights, kernels_results = (
      tfp.mcmc.sample_annealed_importance_chain(
          num_steps=1000,
          proposal_log_prob_fn=proposal.log_prob,
          target_log_prob_fn=target.log_prob,
          current_state=proposal.sample(num_chains),
          make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=tlp_fn,
            step_size=0.2,
            num_leapfrog_steps=2)))

  log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights)
                              - np.log(num_chains))
  log_true_normalizer = tf.lgamma(2.) - 2. * tf.log(3.)
  ```

  ##### Estimate marginal likelihood of a Bayesian regression model.

  ```python
  tfd = tfp.distributions

  def make_prior(dims, dtype):
    return tfd.MultivariateNormalDiag(
        loc=tf.zeros(dims, dtype))

  def make_likelihood(weights, x):
    return tfd.MultivariateNormalDiag(
        loc=tf.tensordot(weights, x, axes=[[0], [-1]]))

  # Run 100 AIS chains in parallel
  num_chains = 100
  dims = 10
  dtype = np.float32

  # Make training data.
  x = np.random.randn(num_chains, dims).astype(dtype)
  true_weights = np.random.randn(dims).astype(dtype)
  y = np.dot(x, true_weights) + np.random.randn(num_chains)

  # Setup model.
  prior = make_prior(dims, dtype)
  def target_log_prob_fn(weights):
    return prior.log_prob(weights) + make_likelihood(weights, x).log_prob(y)

  proposal = tfd.MultivariateNormalDiag(
      loc=tf.zeros(dims, dtype))

  weight_samples, ais_weights, kernel_results = (
      tfp.mcmc.sample_annealed_importance_chain(
        num_steps=1000,
        proposal_log_prob_fn=proposal.log_prob,
        target_log_prob_fn=target_log_prob_fn
        current_state=tf.zeros([num_chains, dims], dtype),
        make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo(
          target_log_prob_fn=tlp_fn,
          step_size=0.1,
          num_leapfrog_steps=2)))
  log_normalizer_estimate = (tf.reduce_logsumexp(ais_weights)
                             - np.log(num_chains))
  ```

  """
    with tf.name_scope(name, "sample_annealed_importance_chain",
                       [num_steps, current_state]):
        num_steps = tf.convert_to_tensor(num_steps,
                                         dtype=tf.int32,
                                         name="num_steps")
        if mcmc_util.is_list_like(current_state):
            current_state = [
                tf.convert_to_tensor(s, name="current_state")
                for s in current_state
            ]
        else:
            current_state = tf.convert_to_tensor(current_state,
                                                 name="current_state")

        def _make_convex_combined_log_prob_fn(iter_):
            def _fn(*args):
                p = tf.identity(proposal_log_prob_fn(*args),
                                name="proposal_log_prob")
                t = tf.identity(target_log_prob_fn(*args),
                                name="target_log_prob")
                dtype = p.dtype.base_dtype
                beta = tf.cast(iter_ + 1, dtype) / tf.cast(num_steps, dtype)
                return tf.identity(beta * t + (1. - beta) * p,
                                   name="convex_combined_log_prob")

            return _fn

        def _loop_body(iter_, ais_weights, current_state, kernel_results):
            """Closure which implements `tf.while_loop` body."""
            x = (current_state
                 if mcmc_util.is_list_like(current_state) else [current_state])
            proposal_log_prob = proposal_log_prob_fn(*x)
            target_log_prob = target_log_prob_fn(*x)
            ais_weights += ((target_log_prob - proposal_log_prob) /
                            tf.cast(num_steps, ais_weights.dtype))
            kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_))
            next_state, inner_results = kernel.one_step(
                current_state, kernel_results.inner_results)
            kernel_results = AISResults(
                proposal_log_prob=proposal_log_prob,
                target_log_prob=target_log_prob,
                inner_results=inner_results,
            )
            return [iter_ + 1, ais_weights, next_state, kernel_results]

        def _bootstrap_results(init_state):
            """Creates first version of `previous_kernel_results`."""
            kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_=0))
            inner_results = kernel.bootstrap_results(init_state)

            convex_combined_log_prob = inner_results.accepted_results.target_log_prob
            dtype = convex_combined_log_prob.dtype.as_numpy_dtype
            shape = tf.shape(convex_combined_log_prob)
            proposal_log_prob = tf.fill(shape,
                                        dtype(np.nan),
                                        name="bootstrap_proposal_log_prob")
            target_log_prob = tf.fill(shape,
                                      dtype(np.nan),
                                      name="target_target_log_prob")

            return AISResults(
                proposal_log_prob=proposal_log_prob,
                target_log_prob=target_log_prob,
                inner_results=inner_results,
            )

        previous_kernel_results = _bootstrap_results(current_state)
        inner_results = previous_kernel_results.inner_results

        ais_weights = tf.zeros(shape=tf.broadcast_dynamic_shape(
            tf.shape(inner_results.proposed_results.target_log_prob),
            tf.shape(inner_results.accepted_results.target_log_prob)),
                               dtype=inner_results.proposed_results.
                               target_log_prob.dtype.base_dtype)

        [_, ais_weights, current_state, kernel_results] = tf.while_loop(
            cond=lambda iter_, *args: iter_ < num_steps,
            body=_loop_body,
            loop_vars=[
                np.int32(0),  # iter_
                ais_weights,
                current_state,
                previous_kernel_results,
            ],
            parallel_iterations=parallel_iterations)

        return [current_state, ais_weights, kernel_results]
示例#35
0
def lu_solve(lower_upper, perm, rhs, validate_args=False, name=None):
    """Solves systems of linear eqns `A X = RHS`, given LU factorizations.

  Note: this function does not verify the implied matrix is actually invertible
  nor is this condition checked even when `validate_args=True`.

  Args:
    lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if
      `matmul(P, matmul(L, U)) = X` then `lower_upper = L + U - eye`.
    perm: `p` as returned by `tf.linag.lu`, i.e., if
      `matmul(P, matmul(L, U)) = X` then `perm = argmax(P)`.
    rhs: Matrix-shaped float `Tensor` representing targets for which to solve;
      `A X = RHS`. To handle vector cases, use:
      `lu_solve(..., rhs[..., tf.newaxis])[..., 0]`.
    validate_args: Python `bool` indicating whether arguments should be checked
      for correctness. Note: this function does not verify the implied matrix is
      actually invertible, even when `validate_args=True`.
      Default value: `False` (i.e., don't validate arguments).
    name: Python `str` name given to ops managed by this object.
      Default value: `None` (i.e., "lu_solve").

  Returns:
    x: The `X` in `A @ X = RHS`.

  #### Examples

  ```python
  import numpy as np
  import tensorflow as tf
  import tensorflow_probability as tfp

  x = [[[1., 2],
        [3, 4]],
       [[7, 8],
        [3, 4]]]
  inv_x = tfp.math.lu_solve(*tf.linalg.lu(x), rhs=tf.eye(2))
  tf.assert_near(tf.matrix_inverse(x), inv_x)
  # ==> True
  ```

  """

    with tf.name_scope(name, 'lu_solve', [lower_upper, perm, rhs]):
        lower_upper = tf.convert_to_tensor(value=lower_upper,
                                           dtype_hint=tf.float32,
                                           name='lower_upper')
        perm = tf.convert_to_tensor(value=perm,
                                    dtype_hint=tf.int32,
                                    name='perm')
        rhs = tf.convert_to_tensor(value=rhs,
                                   dtype_hint=lower_upper.dtype,
                                   name='rhs')

        assertions = _lu_solve_assertions(lower_upper, perm, rhs,
                                          validate_args)
        if assertions:
            with tf.control_dependencies(assertions):
                lower_upper = tf.identity(lower_upper)
                perm = tf.identity(perm)
                rhs = tf.identity(rhs)

        if rhs.shape.ndims == 2 and perm.shape.ndims == 1:
            # Both rhs and perm have scalar batch_shape.
            permuted_rhs = tf.gather(rhs, perm, axis=-2)
        else:
            # Either rhs or perm have non-scalar batch_shape or we can't determine
            # this information statically.
            rhs_shape = tf.shape(input=rhs)
            broadcast_batch_shape = tf.broadcast_dynamic_shape(
                rhs_shape[:-2],
                tf.shape(input=perm)[:-1])
            d, m = rhs_shape[-2], rhs_shape[-1]
            rhs_broadcast_shape = tf.concat([broadcast_batch_shape, [d, m]],
                                            axis=0)

            # Tile out rhs.
            broadcast_rhs = tf.broadcast_to(rhs, rhs_broadcast_shape)
            broadcast_rhs = tf.reshape(broadcast_rhs, [-1, d, m])

            # Tile out perm and add batch indices.
            broadcast_perm = tf.broadcast_to(perm, rhs_broadcast_shape[:-1])
            broadcast_perm = tf.reshape(broadcast_perm, [-1, d])
            broadcast_batch_size = tf.reduce_prod(
                input_tensor=broadcast_batch_shape)
            broadcast_batch_indices = tf.broadcast_to(
                tf.range(broadcast_batch_size)[:, tf.newaxis],
                [broadcast_batch_size, d])
            broadcast_perm = tf.stack(
                [broadcast_batch_indices, broadcast_perm], axis=-1)

            permuted_rhs = tf.gather_nd(broadcast_rhs, broadcast_perm)
            permuted_rhs = tf.reshape(permuted_rhs, rhs_broadcast_shape)

        lower = tf.linalg.set_diag(
            tf.linalg.band_part(lower_upper, num_lower=-1, num_upper=0),
            tf.ones(tf.shape(input=lower_upper)[:-1], dtype=lower_upper.dtype))
        return linear_operator_util.matrix_triangular_solve_with_broadcast(
            lower_upper,  # Only upper is accessed.
            linear_operator_util.matrix_triangular_solve_with_broadcast(
                lower, permuted_rhs),
            lower=False)
示例#36
0
 def sample_n(self, n, seed=None):
     shape = tf.concat([[n], tf.broadcast_dynamic_shape(tf.shape(self.loc), tf.shape(self.scale))], axis=0)
     sampled = tf.random.normal(shape=shape, mean=0., stddev=1., dtype=tf.float32, seed=seed)
     return sampled * self.scale + self.loc
示例#37
0
 def _batch_shape(self):
     return tf.broadcast_dynamic_shape(tf.shape(self.loc),
                                       tf.shape(self.scale))
示例#38
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(
         self.kernel.batch_shape_tensor(),
         tf.shape(input=self.scale_diag)[:-self.kernel.feature_ndims])
示例#39
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(self.total_count), tf.shape(self.probs))
示例#40
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(input=self.concentration), tf.shape(input=self.rate))
示例#41
0
 def _batch_shape_tensor(self):
     with self._name_scope("batch_shape_tensor"):
         return tf.broadcast_dynamic_shape(tf.shape(self.amplitude),
                                           tf.shape(self.length_scale))
示例#42
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(tf.shape(input=self.loc),
                                       tf.shape(input=self.scale))
 def _set_event_shape(shape, shape_tensor):
     if event_shape is None:
         return shape, shape_tensor
     return (tf.broadcast_static_shape(event_shape, shape),
             tf.broadcast_dynamic_shape(event_shape_tensor,
                                        shape_tensor))
示例#44
0
    def posterior_marginals(self, observations):
        """Compute marginal posterior distribution for each state.

    This function computes, for each time step, the marginal
    conditional probability that the hidden Markov model was in
    each possible state given the observations that were made
    at each time step.
    So if the hidden states are `z[0],...,z[num_steps - 1]` and
    the observations are `x[0], ..., x[num_steps - 1]`, then
    this function computes `P(z[i] | x[0], ..., x[num_steps - 1])`
    for all `i` from `0` to `num_steps - 1`.

    This operation is sometimes called smoothing. It uses a form
    of the forward-backward algorithm.

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Args:
      observations: A tensor representing a batch of observations
      made on the hidden Markov model.  The rightmost dimension
      of this tensor gives the steps in a sequence of observations
      from a single sample from the hidden Markov model. The size
      of this dimension should match the `num_steps` parameter
      of the hidden Markov model object. The other dimensions are
      the dimensions of the batch and these are broadcast with
      the hidden Markov model's parameters.

    Returns:
      A `Categorical` distribution object representing the marginal
      probability of the hidden Markov model being in each state at
      each step. The rightmost dimension of the `Categorical`
      distributions batch will equal the `num_steps` parameter
      providing one marginal distribution for each step. The
      other dimensions are the dimensions corresponding to the
      batch of observations.

    Raises:
      ValueError: if rightmost dimension of `observations` does not
      have size `num_steps`.
    """

        with tf.compat.v1.name_scope("posterior_marginals",
                                     values=[observations]):
            with tf.control_dependencies(self._runtime_assertions):
                observation_tensor_shape = tf.shape(input=observations)

                with self._observation_shape_preconditions(
                        observation_tensor_shape):
                    observation_batch_shape = observation_tensor_shape[:-1 -
                                                                       self.
                                                                       _underlying_event_rank]
                    observation_event_shape = observation_tensor_shape[
                        -1 - self._underlying_event_rank:]

                    batch_shape = tf.broadcast_dynamic_shape(
                        observation_batch_shape, self.batch_shape_tensor())
                    log_init = tf.broadcast_to(
                        self._log_init,
                        tf.concat([batch_shape, [self._num_states]], axis=0))
                    log_transition = self._log_trans

                    observations = tf.broadcast_to(
                        observations,
                        tf.concat([batch_shape, observation_event_shape],
                                  axis=0))
                    observation_rank = tf.rank(observations)
                    underlying_event_rank = self._underlying_event_rank
                    observations = util.move_dimension(
                        observations,
                        observation_rank - underlying_event_rank - 1, 0)
                    observations = tf.expand_dims(
                        observations, observation_rank - underlying_event_rank)
                    observation_log_probs = self._observation_distribution.log_prob(
                        observations)

                    log_adjoint_prob = tf.zeros_like(log_init)

                    def forward_step(log_previous_step, log_prob_observation):
                        return _log_vector_matrix(
                            log_previous_step,
                            log_transition) + log_prob_observation

                    log_prob = log_init + observation_log_probs[0]

                    forward_log_probs = tf.scan(forward_step,
                                                observation_log_probs[1:],
                                                initializer=log_prob,
                                                name="forward_log_probs")

                    forward_log_probs = tf.concat(
                        [[log_prob], forward_log_probs], axis=0)

                    def backward_step(log_previous_step, log_prob_observation):
                        return _log_matrix_vector(
                            log_transition,
                            log_prob_observation + log_previous_step)

                    backward_log_adjoint_probs = tf.scan(
                        backward_step,
                        observation_log_probs[1:],
                        initializer=log_adjoint_prob,
                        reverse=True,
                        name="backward_log_adjoint_probs")

                    total_log_prob = tf.reduce_logsumexp(
                        input_tensor=forward_log_probs[-1], axis=-1)

                    backward_log_adjoint_probs = tf.concat(
                        [backward_log_adjoint_probs, [log_adjoint_prob]],
                        axis=0)

                    log_likelihoods = forward_log_probs + backward_log_adjoint_probs

                    marginal_log_probs = util.move_dimension(
                        log_likelihoods - total_log_prob[..., tf.newaxis], 0,
                        -2)

                    return categorical.Categorical(logits=marginal_log_probs)
示例#45
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       self.distribution.batch_shape_tensor(),
       tf.shape(self.mixture_distribution.logits))[:-1]
示例#46
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(tf.shape(self.loc),
                                       tf.shape(self.concentration))
示例#47
0
def _dynamic_broadcast_shape_from_tensors(*tensors):
    shape = tf.shape(tensors[0])
    for t in tensors[1:]:
        shape = tf.broadcast_dynamic_shape(shape, tf.shape(t))
    return shape
示例#48
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(tf.shape(self.loc), tf.shape(self.scale))
示例#49
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(self.concentration), tf.shape(self.rate))
示例#50
0
 def _batch_shape(self):
     return tf.broadcast_dynamic_shape(tf.shape(self.mean),
                                       tf.shape(self.std))
示例#51
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(tf.shape(input=self.temperature),
                                       tf.shape(input=self.logits)[:-1])
示例#52
0
 def _batch_shape(self):
     return tf.broadcast_dynamic_shape(tf.shape(self.alpha),
                                       tf.shape(self.beta))
示例#53
0
 def _batch_shape_tensor(self):
     return tf.broadcast_dynamic_shape(
         tf.shape(self.df), self.scale_operator.batch_shape_tensor())