def _batch_shape_tensor(self): with tf.control_dependencies(self._runtime_assertions): return tf.broadcast_dynamic_shape( self._initial_distribution.batch_shape_tensor(), tf.broadcast_dynamic_shape( self._transition_distribution.batch_shape_tensor()[:-1], self._observation_distribution.batch_shape_tensor()[:-1]))
def _batch_shape_tensor(self): with self._name_scope("batch_shape_tensor"): return tf.broadcast_dynamic_shape( tf.broadcast_dynamic_shape( tf.shape(self.amplitude), tf.shape(self.length_scale)), tf.shape(self.period))
def _itemwise_error_rate( total_error_rate, param_tensors, sample_tensor=None, name=None): with tf.name_scope(name, "itemwise_error_rate", [total_error_rate, param_tensors, sample_tensor]): result_shape = [1] for p_tensor in param_tensors: result_shape = tf.broadcast_dynamic_shape( tf.shape(p_tensor), result_shape) if sample_tensor is not None: result_shape = tf.broadcast_dynamic_shape( tf.shape(sample_tensor)[1:], result_shape) num_items = tf.reduce_prod(result_shape) return total_error_rate / tf.cast(num_items, dtype=total_error_rate.dtype)
def _validate_observation_data(self): # Check that observation index points and observation counts broadcast. assertions = [] msg = ('Observation index point and observation counts are not ' 'broadcastable.') ndims = self.kernel.feature_ndims if (self.observation_index_points.shape[:-ndims].is_fully_defined() and self.observations.shape.is_fully_defined()): index_point_count = self.observation_index_points.shape[:-ndims] observation_count = self.observations.shape try: tf.broadcast_static_shape(index_point_count, observation_count) except ValueError: # Re-raise with our own more contextual error message. raise ValueError(msg[:-1] + ': {} and {}, respectively.'.format( index_point_count, observation_count)) else: if self._validate_args: # Instead of an assertion of broadcastability, we simply append an op # to dynamically broadcast the two shapes; if this fails, the shapes # must not be broadcastable. broadcast_op = tf.broadcast_dynamic_shape( tf.shape(self.observation_index_points)[:-ndims], tf.shape(self.observations), name='check_that_index_points_and_observation_shapes_broadcast') assertions.append(broadcast_op) return assertions
def _apply_noisy_update(self, mom, grad, var): # Compute and apply the gradient update following # preconditioned Langevin dynamics stddev = tf.where( tf.squeeze(self._counter > self._burnin), tf.cast(tf.rsqrt(self._learning_rate), grad.dtype), tf.zeros([], grad.dtype)) # Keep an exponentially weighted moving average of squared gradients. # Not thread safe decay_tensor = tf.cast(self._decay_tensor, grad.dtype) new_mom = decay_tensor * mom + (1. - decay_tensor) * tf.square(grad) preconditioner = tf.rsqrt( new_mom + tf.cast(self._diagonal_bias, grad.dtype)) # Compute gradients of the preconsitionaer _, preconditioner_grads = diag_jacobian( xs=var, ys=preconditioner, parallel_iterations=self._parallel_iterations) mean = 0.5 * (preconditioner * grad * tf.cast(self._data_size, grad.dtype) - preconditioner_grads[0]) stddev *= tf.sqrt(preconditioner) result_shape = tf.broadcast_dynamic_shape(tf.shape(mean), tf.shape(stddev)) with tf.control_dependencies([tf.assign(mom, new_mom)]): return tf.random_normal(shape=result_shape, mean=mean, stddev=stddev, dtype=grad.dtype)
def _cdf(self, x): broadcast_shape = tf.broadcast_dynamic_shape( tf.shape(x), self.batch_shape_tensor()) zeros = tf.zeros(broadcast_shape, dtype=self.dtype) ones = tf.ones(broadcast_shape, dtype=self.dtype) broadcasted_x = x * ones result_if_not_big = tf.where( x < self.low, zeros, (broadcasted_x - self.low) / self.range()) return tf.where(x >= self.high, ones, result_if_not_big)
def check(t): samples_batch_shape = tf.shape(samples)[1:] broadcasted_batch_shape = tf.broadcast_dynamic_shape( samples_batch_shape, tf.shape(t)) # This rank check ensures that I don't get a wrong answer from the # _shapes_ broadcasting against each other. samples_batch_ndims = tf.size(samples_batch_shape) ge = tf.assert_greater_equal(samples_batch_ndims, tf.rank(t)) eq = tf.assert_equal(samples_batch_shape, broadcasted_batch_shape) return ge, eq
def batch_shape_tensor(self): """Runtime batch shape of models represented by this component. Returns: batch_shape: `int` `Tensor` giving the broadcast batch shape of all model parameters. This should match the batch shape of derived state space models, i.e., `self.make_state_space_model(...).batch_shape_tensor()`. """ batch_shape = tf.constant([], dtype=tf.int32) for param in self.parameters: batch_shape = tf.broadcast_dynamic_shape( batch_shape, param.prior.batch_shape_tensor()) return batch_shape
def determine_batch_event_shapes(grid, endpoint_affine): """Helper to infer batch_shape and event_shape.""" with tf.name_scope(name="determine_batch_event_shapes"): # grid # shape: [B, k, q] # endpoint_affine # len=k, shape: [B, d, d] batch_shape = grid.shape[:-2] batch_shape_tensor = tf.shape(grid)[:-2] event_shape = None event_shape_tensor = None def _set_event_shape(shape, shape_tensor): if event_shape is None: return shape, shape_tensor return (tf.broadcast_static_shape(event_shape, shape), tf.broadcast_dynamic_shape(event_shape_tensor, shape_tensor)) for aff in endpoint_affine: if aff.shift is not None: batch_shape = tf.broadcast_static_shape(batch_shape, aff.shift.shape[:-1]) batch_shape_tensor = tf.broadcast_dynamic_shape( batch_shape_tensor, tf.shape(aff.shift)[:-1]) event_shape, event_shape_tensor = _set_event_shape( aff.shift.shape[-1:], tf.shape(aff.shift)[-1:]) if aff.scale is not None: batch_shape = tf.broadcast_static_shape(batch_shape, aff.scale.batch_shape) batch_shape_tensor = tf.broadcast_dynamic_shape( batch_shape_tensor, aff.scale.batch_shape_tensor()) event_shape, event_shape_tensor = _set_event_shape( tf.TensorShape([aff.scale.range_dimension]), aff.scale.range_dimension_tensor()[tf.newaxis]) return batch_shape, batch_shape_tensor, event_shape, event_shape_tensor
def _log_prob(self, value): with tf.control_dependencies(self._runtime_assertions): # The argument `value` is a tensor of sequences of observations. # `observation_batch_shape` is the shape of that tensor with the # sequence part removed. # `observation_batch_shape` is then broadcast to the full batch shape # to give the `working_shape` that defines the shape of the result. observation_batch_shape = tf.shape( value)[:-1 - self._underlying_event_rank] # value :: observation_batch_shape num_steps observation_event_shape working_shape = tf.broadcast_dynamic_shape(observation_batch_shape, self.batch_shape_tensor()) log_init = tf.broadcast_to(self._log_init, tf.concat([working_shape, [self._num_states]], axis=0)) # log_init :: working_shape num_states log_transition = self._log_trans # `observation_event_shape` is the shape of each sequence of observations # emitted by the model. observation_event_shape = tf.shape( value)[-1 - self._underlying_event_rank:] working_obs = tf.broadcast_to(value, tf.concat([working_shape, observation_event_shape], axis=0)) # working_obs :: working_shape observation_event_shape r = self._underlying_event_rank # Move index into sequence of observations to front so we can apply # tf.foldl working_obs = util.move_dimension(working_obs, -1 - r, 0)[..., tf.newaxis] # working_obs :: num_steps working_shape underlying_event_shape observation_probs = ( self._observation_distribution.log_prob(working_obs)) def forward_step(log_prev_step, log_observation): return _log_vector_matrix(log_prev_step, log_transition) + log_observation fwd_prob = tf.foldl(forward_step, observation_probs, initializer=log_init) # fwd_prob :: working_shape num_states log_prob = tf.reduce_logsumexp(fwd_prob, axis=-1) # log_prob :: working_shape return log_prob
def broadcast_batch_shape(distributions): """Get broadcast batch shape from distributions, statically if possible.""" # Static case batch_shape = distributions[0].batch_shape for distribution in distributions: batch_shape = tf.broadcast_static_shape(batch_shape, distribution.batch_shape) if batch_shape.is_fully_defined(): return batch_shape.as_list() # Fallback on dynamic. batch_shape = distributions[0].batch_shape_tensor() for distribution in distributions: batch_shape = tf.broadcast_dynamic_shape(batch_shape, distribution.batch_shape_tensor()) return tf.convert_to_tensor(batch_shape)
def random_rayleigh(shape, scale=None, dtype=tf.float32, seed=None, name=None): """Generates `Tensor` of positive reals drawn from a Rayleigh distributions. The probability density function of a Rayleigh distribution with `scale` parameter is given by: ```none f(x) = x scale**-2 exp(-x**2 0.5 scale**-2) ``` For more details, see [Rayleigh distribution]( https://en.wikipedia.org/wiki/Rayleigh_distribution) Args: shape: Vector-shaped, `int` `Tensor` representing shape of output. scale: (Optional) Positive `float` `Tensor` representing `Rayleigh` scale. Default value: `None` (i.e., `scale = 1.`). dtype: (Optional) TF `dtype` representing `dtype` of output. Default value: `tf.float32`. seed: (Optional) Python integer to seed the random number generator. Default value: `None` (i.e., no seed). name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., 'random_rayleigh'). Returns: rayleigh: `Tensor` with specified `shape` and `dtype` consisting of positive real values drawn from a Rayleigh distribution with specified `scale`. """ with tf.name_scope(name, 'random_rayleigh', [shape, scale, seed]): if scale is not None: # Its important to expand the shape to match scale's, otherwise we won't # have independent draws. scale = tf.convert_to_tensor(scale, dtype=dtype, name='scale') shape = tf.broadcast_dynamic_shape(shape, tf.shape(scale)) x = tf.sqrt(-2. * tf.log(tf.random_uniform( shape, minval=0, maxval=1, dtype=dtype, seed=seed))) if scale is None: return x return x * scale
def get_broadcast_shape(*tensors): """Get broadcast shape as a Python list of integers (preferred) or `Tensor`. Args: *tensors: One or more `Tensor` objects (already converted!). Returns: broadcast shape: Python list (if shapes determined statically), otherwise an `int32` `Tensor`. """ # Try static. s_shape = tensors[0].shape for t in tensors[1:]: s_shape = tf.broadcast_static_shape(s_shape, t.shape) if s_shape.is_fully_defined(): return s_shape.as_list() # Fallback on dynamic. d_shape = tf.shape(tensors[0]) for t in tensors[1:]: d_shape = tf.broadcast_dynamic_shape(d_shape, tf.shape(t)) return d_shape
def iou(box_1, box_2): #box_1 is the pred box and contains multiple class prob values, one for each class #box_2 is the true box and contains just one class value box_1 = tf.expand_dims(box_1, -2) box_2 = tf.expand_dims(box_2, 0) new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2)) box_1 = tf.broadcast_to(box_1, new_shape) box_2 = tf.broadcast_to(box_2, new_shape) intersection_w = tf.maximum( tf.minimum(box_1[..., 2], box_2[..., 2]) - tf.maximum(box_1[..., 0], box_2[..., 0]), 0) intersection_h = tf.maximum( tf.minimum(box_1[..., 3], box_2[..., 3]) - tf.maximum(box_1[..., 1], box_2[..., 1]), 0) intersection_area = intersection_w * intersection_h box_1_area = (box_1[..., 2] - box_1[..., 0]) * (box_1[..., 3] - box_1[..., 1]) box_2_area = (box_2[..., 2] - box_2[..., 0]) * (box_2[..., 3] - box_2[..., 1]) return intersection_area / (box_1_area + box_2_area - intersection_area)
def broadcast_iou(box_1, box_2): # box_1: (..., (x1, y1, x2, y2)) # box_2: (N, (x1, y1, x2, y2)) # broadcast boxes box_1 = tf.expand_dims(box_1, -2) box_2 = tf.expand_dims(box_2, 0) # new_shape: (..., N, (x1, y1, x2, y2)) new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2)) box_1 = tf.broadcast_to(box_1, new_shape) box_2 = tf.broadcast_to(box_2, new_shape) int_w = tf.maximum( tf.minimum(box_1[..., 2], box_2[..., 2]) - tf.maximum(box_1[..., 0], box_2[..., 0]), 0) int_h = tf.maximum( tf.minimum(box_1[..., 3], box_2[..., 3]) - tf.maximum(box_1[..., 1], box_2[..., 1]), 0) int_area = int_w * int_h box_1_area = (box_1[..., 2] - box_1[..., 0]) * \ (box_1[..., 3] - box_1[..., 1]) box_2_area = (box_2[..., 2] - box_2[..., 0]) * \ (box_2[..., 3] - box_2[..., 1]) return int_area / (box_1_area + box_2_area - int_area)
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(self.df), self.scale_operator.batch_shape_tensor())
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(input=self.peak), tf.broadcast_dynamic_shape(tf.shape(input=self.low), tf.shape(input=self.high)))
def _set_event_shape(shape, shape_tensor): if event_shape is None: return shape, shape_tensor return (tf.broadcast_static_shape(event_shape, shape), tf.broadcast_dynamic_shape(event_shape_tensor, shape_tensor))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(input=self.total_count), tf.shape(input=self.probs))
def _batch_shape_tensor(self, low=None, high=None): return tf.broadcast_dynamic_shape( tf.shape(self.low if low is None else low), tf.shape(self.high if high is None else high), )
def __init__(self, mean, stddev=None, logstd=None, group_event_ndims=None, check_numerics=False, name=None, scope=None): # check the arguments if (stddev is None and logstd is None) or \ (stddev is not None and logstd is not None): raise ValueError('One and only one of `stddev`, `logstd` should ' 'be specified.') dtype = get_preferred_tensor_dtype(mean) if not dtype.is_floating: raise TypeError('Normal distribution parameters must be float ' 'numbers.') super(Normal, self).__init__( group_event_ndims=group_event_ndims, check_numerics=check_numerics, name=name, scope=scope, ) with reopen_variable_scope(self.variable_scope): with tf.name_scope('init'): # obtain parameter tensors mean = tf.convert_to_tensor(mean, dtype=dtype) if stddev is not None: stddev = tf.convert_to_tensor(stddev, dtype=dtype) self._stdx = stddev self._stdx_is_log = False else: logstd = tf.convert_to_tensor(logstd, dtype=dtype) self._stdx = logstd self._stdx_is_log = True # check the shape and data types of parameters self._mean = mean try: self._static_batch_shape = tf.broadcast_static_shape( self._mean.get_shape(), self._stdx.get_shape()) except ValueError: raise ValueError( '`mean` and `stddev`/`logstd` should be ' 'broadcastable to match each other (%r vs %r).' % (self._mean.get_shape(), self._stdx.get_shape())) self._dynamic_batch_shape = tf.broadcast_dynamic_shape( tf.shape(self._mean), tf.shape(self._stdx)) # derive the attributes of this Normal distribution if self._stdx_is_log: self._stddev = self._check_numerics( tf.exp(self._stdx, name='stddev'), 'stddev') self._logstd = self._stdx self._var = self._check_numerics( tf.exp(tf.constant(2., dtype=dtype) * self._logstd, name='variance'), 'variance') self._precision = self._check_numerics( tf.exp(tf.constant(-2., dtype=dtype) * self._logstd, name='precision'), 'precision') else: self._stddev = self._stdx self._logstd = self._check_numerics( tf.log(self._stdx, name='logstd'), 'logstd') self._var = tf.square(self._stddev, name='variance') self._precision = self._check_numerics( tf.divide(tf.constant(1., dtype=dtype), self._var, name='precision'), 'precision') self._logvar = tf.multiply(tf.constant(2., dtype=dtype), self._logstd, name='logvar') self._log_prec = tf.negative(self._logvar, name='log_precision')
def __init__(self, mean, log_scale, bin_size, min_val=None, max_val=None, dtype=tf.float32, biased_edges=True, discretize_given=True, discretize_sample=True, epsilon=1e-7): """ Construct a new :class:`DiscretizedLogistic`. Args: mean: A Tensor, the `mean`. log_scale: A Tensor, the `log(scale)`. bin_size: A scalar, the `bin_size`. min_val: A scalar, the minimum possible value of `x`. max_val: A scalar, the maximum possible value of `x`. dtype: The data type of `x`. biased_edges: Whether or not to use bias density for edge values? See above. discretize_given (bool): Whether or not to discretize `given` in :meth:`log_prob` and :meth:`prob`? discretize_sample (bool): Whether or not to discretize the generated samples in :meth:`sample`? epsilon: Small float to avoid dividing by zero or taking logarithm of zero. """ # check the arguments mean = tf.convert_to_tensor(mean) param_dtype = mean.dtype log_scale = tf.convert_to_tensor(log_scale) dtype = tf.as_dtype(dtype) if not is_integer_number(bin_size) and not dtype.is_floating: raise ValueError( '`bin_size` is a float number, but `dtype` is not a float ' 'number type: {}'.format(dtype)) if (min_val is None and max_val is not None) or \ (min_val is not None and max_val is None): raise ValueError('`min_val` and `max_val` must be both None or ' 'neither None.') if max_val is not None and min_val is not None and \ not is_integer_number((max_val - min_val) / bin_size): raise ValueError( '`min_val - max_val` must be multiples of `bin_size`: ' 'max_val - min_val = {} vs bin_size = {}'.format( max_val - min_val, bin_size)) # infer the batch shape try: batch_static_shape = tf.broadcast_static_shape( mean.get_shape(), log_scale.get_shape()) except ValueError: raise ValueError('The shape of `mean` and `log_scale` cannot ' 'be broadcasted: mean {} vs log_scale {}'.format( mean, log_scale)) with tf.name_scope('DiscretizedLogistic.init'): batch_shape = tf.broadcast_dynamic_shape(tf.shape(mean), tf.shape(log_scale)) # memorize the arguments and call parent constructor bin_size = convert_to_tensor_and_cast(bin_size, param_dtype) if min_val is not None: min_val = convert_to_tensor_and_cast(min_val, param_dtype) if max_val is not None: max_val = convert_to_tensor_and_cast(max_val, param_dtype) self._mean = mean self._log_scale = log_scale self._param_dtype = param_dtype self._bin_size = bin_size self._min_val = min_val self._max_val = max_val self._biased_edges = bool(biased_edges) self._discretize_given = bool(discretize_given) self._discretize_sample = bool(discretize_sample) self._epsilon = epsilon super(DiscretizedLogistic, self).__init__(dtype=dtype, is_continuous=not self._discretize_sample, is_reparameterized=not self._discretize_sample, batch_shape=batch_shape, batch_static_shape=batch_static_shape, value_ndims=0)
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(self.mean_direction)[:-1], tf.shape(self.concentration))
def batch_interp_regular_nd_grid(x, x_ref_min, x_ref_max, y_ref, axis, fill_value='constant_extension', name=None): """Multi-linear interpolation on a regular (constant spacing) grid. Given [a batch of] reference values, this function computes a multi-linear interpolant and evaluates it on [a batch of] of new `x` values. The interpolant is built from reference values indexed by `nd` dimensions of `y_ref`, starting at `axis`. For example, take the case of a `2-D` scalar valued function and no leading batch dimensions. In this case, `y_ref.shape = [C1, C2]` and `y_ref[i, j]` is the reference value corresponding to grid point ``` [x_ref_min[0] + i * (x_ref_max[0] - x_ref_min[0]) / (C1 - 1), x_ref_min[1] + j * (x_ref_max[1] - x_ref_min[1]) / (C2 - 1)] ``` In the general case, dimensions to the left of `axis` in `y_ref` are broadcast with leading dimensions in `x`, `x_ref_min`, `x_ref_max`. Args: x: Numeric `Tensor` The x-coordinates of the interpolated output values for each batch. Shape `[..., D, nd]`, designating [a batch of] `D` coordinates in `nd` space. `D` must be `>= 1` and is not a batch dim. x_ref_min: `Tensor` of same `dtype` as `x`. The minimum values of the (implicitly defined) reference `x_ref`. Shape `[..., nd]`. x_ref_max: `Tensor` of same `dtype` as `x`. The maximum values of the (implicitly defined) reference `x_ref`. Shape `[..., nd]`. y_ref: `Tensor` of same `dtype` as `x`. The reference output values. Shape `[..., C1, ..., Cnd, B1,...,BM]`, designating [a batch of] reference values indexed by `nd` dimensions, of a shape `[B1,...,BM]` valued function (for `M >= 0`). axis: Scalar integer `Tensor`. Dimensions `[axis, axis + nd)` of `y_ref` index the interpolation table. E.g. `3-D` interpolation of a scalar valued function requires `axis=-3` and a `3-D` matrix valued function requires `axis=-5`. fill_value: Determines what values output should take for `x` values that are below `x_ref_min` or above `x_ref_max`. Scalar `Tensor` or "constant_extension" ==> Extend as constant function. Default value: `"constant_extension"` name: A name to prepend to created ops. Default value: `"batch_interp_regular_nd_grid"`. Returns: y_interp: Interpolation between members of `y_ref`, at points `x`. `Tensor` of same `dtype` as `x`, and shape `[..., D, B1, ..., BM].` Raises: ValueError: If `rank(x) < 2` is determined statically. ValueError: If `axis` is not a scalar is determined statically. ValueError: If `axis + nd > rank(y_ref)` is determined statically. #### Examples Interpolate a function of one variable. ```python y_ref = tf.exp(tf.linspace(start=0., stop=10., 20)) tfp.math.batch_interp_regular_nd_grid( # x.shape = [3, 1], x_ref_min/max.shape = [1]. Trailing `1` for `1-D`. x=[[6.0], [0.5], [3.3]], x_ref_min=[0.], x_ref_max=[1.], y_ref=y_ref) ==> approx [exp(6.0), exp(0.5), exp(3.3)] ``` Interpolate a scalar function of two variables. ```python x_ref_min = [0., 2 * np.pi] x_ref_max = [0., 2 * np.pi] # Build y_ref. x0s, x1s = tf.meshgrid( tf.linspace(x_ref_min[0], x_ref_max[0], num=100), tf.linspace(x_ref_min[1], x_ref_max[1], num=100), indexing='ij') def func(x0, x1): return tf.sin(x0) * tf.cos(x1) y_ref = func(x0s, x1s) x = np.pi * tf.random_uniform(shape=(10, 2)) tfp.math.batch_interp_regular_nd_grid(x, x_ref_min, x_ref_max, y_ref, axis=-2) ==> tf.sin(x[:, 0]) * tf.cos(x[:, 1]) ``` """ with tf.compat.v1.name_scope( name, default_name='interp_regular_nd_grid', values=[x, x_ref_min, x_ref_max, y_ref, fill_value]): dtype = dtype_util.common_dtype([x, x_ref_min, x_ref_max, y_ref], preferred_dtype=tf.float32) # Arg checking. if isinstance(fill_value, str): if fill_value != 'constant_extension': raise ValueError( 'A fill value ({}) was not an allowed string ({})'.format( fill_value, 'constant_extension')) else: fill_value = tf.convert_to_tensor(value=fill_value, name='fill_value', dtype=dtype) _assert_ndims_statically(fill_value, expect_ndims=0) # x.shape = [..., nd]. x = tf.convert_to_tensor(value=x, name='x', dtype=dtype) _assert_ndims_statically(x, expect_ndims_at_least=2) # y_ref.shape = [..., C1,...,Cnd, B1,...,BM] y_ref = tf.convert_to_tensor(value=y_ref, name='y_ref', dtype=dtype) # x_ref_min.shape = [nd] x_ref_min = tf.convert_to_tensor(value=x_ref_min, name='x_ref_min', dtype=dtype) x_ref_max = tf.convert_to_tensor(value=x_ref_max, name='x_ref_max', dtype=dtype) _assert_ndims_statically(x_ref_min, expect_ndims_at_least=1, expect_static=True) _assert_ndims_statically(x_ref_max, expect_ndims_at_least=1, expect_static=True) # nd is the number of dimensions indexing the interpolation table, it's the # "nd" in the function name. nd = tf.compat.dimension_value(x_ref_min.shape[-1]) if nd is None: raise ValueError('`x_ref_min.shape[-1]` must be known statically.') x_ref_max.shape[-1:].assert_is_compatible_with(x_ref_min.shape[-1:]) # Convert axis and check it statically. axis = tf.convert_to_tensor(value=axis, dtype=tf.int32, name='axis') axis = distribution_util.make_non_negative_axis(axis, tf.rank(y_ref)) axis.shape.assert_has_rank(0) axis_ = tf.get_static_value(axis) y_ref_rank_ = tf.get_static_value(tf.rank(y_ref)) if axis_ is not None and y_ref_rank_ is not None: if axis_ + nd > y_ref_rank_: raise ValueError( 'Since dims `[axis, axis + nd)` index the interpolation table, we ' 'must have `axis + nd <= rank(y_ref)`. Found: ' '`axis`: {}, rank(y_ref): {}, and inferred `nd` from trailing ' 'dimensions of `x_ref_min` to be {}.'.format( axis_, y_ref_rank_, nd)) x_batch_shape = tf.shape(input=x)[:-2] x_ref_min_batch_shape = tf.shape(input=x_ref_min)[:-1] x_ref_max_batch_shape = tf.shape(input=x_ref_max)[:-1] y_ref_batch_shape = tf.shape(input=y_ref)[:axis] # Do a brute-force broadcast of batch dims (add zeros). batch_shape = y_ref_batch_shape for tensor in [ x_batch_shape, x_ref_min_batch_shape, x_ref_max_batch_shape ]: batch_shape = tf.broadcast_dynamic_shape(batch_shape, tensor) def _batch_of_zeros_with_rightmost_singletons(n_singletons): """Return Tensor of zeros with some singletons on the rightmost dims.""" ones = tf.ones(shape=[n_singletons], dtype=tf.int32) return tf.zeros(shape=tf.concat([batch_shape, ones], axis=0), dtype=dtype) x += _batch_of_zeros_with_rightmost_singletons(n_singletons=2) x_ref_min += _batch_of_zeros_with_rightmost_singletons(n_singletons=1) x_ref_max += _batch_of_zeros_with_rightmost_singletons(n_singletons=1) y_ref += _batch_of_zeros_with_rightmost_singletons( n_singletons=tf.rank(y_ref) - axis) return _batch_interp_with_gather_nd( x=x, x_ref_min=x_ref_min, x_ref_max=x_ref_max, y_ref=y_ref, nd=nd, fill_value=fill_value, batch_dims=tf.get_static_value(tf.rank(x)) - 2)
def _batch_shape(self): return tf.broadcast_dynamic_shape(tf.shape(self.minval), tf.shape(self.maxval))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( [] if self.amplitude is None else tf.shape(input=self.amplitude), [] if self.length_scale is None else tf.shape(input=self.length_scale))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( self.distribution.batch_shape_tensor(), tf.shape(self.mixture_distribution.logits))[:-1]
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(self._loc)[:-1], self._radius_dist.batch_shape_tensor)
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(self.low), tf.shape(self.high))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( [] if self.amplitude is None else tf.shape(self.amplitude), [] if self.length_scale is None else tf.shape(self.length_scale))
def sample_annealed_importance_chain( num_steps, proposal_log_prob_fn, target_log_prob_fn, current_state, make_kernel_fn, parallel_iterations=10, name=None): """Runs annealed importance sampling (AIS) to estimate normalizing constants. This function uses Hamiltonian Monte Carlo to sample from a series of distributions that slowly interpolates between an initial "proposal" distribution: `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)` and the target distribution: `exp(target_log_prob_fn(x) - target_log_normalizer)`, accumulating importance weights along the way. The product of these importance weights gives an unbiased estimate of the ratio of the normalizing constants of the initial distribution and the target distribution: `E[exp(ais_weights)] = exp(target_log_normalizer - proposal_log_normalizer)`. Note: `proposal_log_prob_fn` and `target_log_prob_fn` are called exactly three times (although this may be reduced to two times, in the future). Args: num_steps: Integer number of Markov chain updates to run. More iterations means more expense, but smoother annealing between q and p, which in turn means exponentially lower variance for the normalizing constant estimator. proposal_log_prob_fn: Python callable that returns the log density of the initial distribution. target_log_prob_fn: Python callable which takes an argument like `current_state` (or `*current_state` if it's a list) and returns its (possibly unnormalized) log-density under the target distribution. current_state: `Tensor` or Python `list` of `Tensor`s representing the current state(s) of the Markov chain(s). The first `r` dimensions index independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`. make_kernel_fn: Python `callable` which returns a `TransitionKernel`-like object. Must take one argument representing the `TransitionKernel`'s `target_log_prob_fn`. The `target_log_prob_fn` argument represents the `TransitionKernel`'s target log distribution. Note: `sample_annealed_importance_chain` creates a new `target_log_prob_fn` which is an interpolation between the supplied `target_log_prob_fn` and `proposal_log_prob_fn`; it is this interpolated function which is used as an argument to `make_kernel_fn`. parallel_iterations: The number of iterations allowed to run in parallel. It must be a positive integer. See `tf.while_loop` for more details. name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., "sample_annealed_importance_chain"). Returns: next_state: `Tensor` or Python list of `Tensor`s representing the state(s) of the Markov chain(s) at the final iteration. Has same shape as input `current_state`. ais_weights: Tensor with the estimated weight(s). Has shape matching `target_log_prob_fn(current_state)`. kernel_results: `collections.namedtuple` of internal calculations used to advance the chain. #### Examples ##### Estimate the normalizing constant of a log-gamma distribution. ```python tfd = tfp.distributions # Run 100 AIS chains in parallel num_chains = 100 dims = 20 dtype = np.float32 proposal = tfd.MultivatiateNormalDiag( loc=tf.zeros([dims], dtype=dtype)) target = tfd.TransformedDistribution( distribution=tfd.Gamma(concentration=dtype(2), rate=dtype(3)), bijector=tfp.bijectors.Invert(tfp.bijectors.Exp()), event_shape=[dims]) chains_state, ais_weights, kernels_results = ( tfp.mcmc.sample_annealed_importance_chain( num_steps=1000, proposal_log_prob_fn=proposal.log_prob, target_log_prob_fn=target.log_prob, current_state=proposal.sample(num_chains), make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=tlp_fn, step_size=0.2, num_leapfrog_steps=2))) log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights) - np.log(num_chains)) log_true_normalizer = tf.lgamma(2.) - 2. * tf.log(3.) ``` ##### Estimate marginal likelihood of a Bayesian regression model. ```python tfd = tfp.distributions def make_prior(dims, dtype): return tfd.MultivariateNormalDiag( loc=tf.zeros(dims, dtype)) def make_likelihood(weights, x): return tfd.MultivariateNormalDiag( loc=tf.tensordot(weights, x, axes=[[0], [-1]])) # Run 100 AIS chains in parallel num_chains = 100 dims = 10 dtype = np.float32 # Make training data. x = np.random.randn(num_chains, dims).astype(dtype) true_weights = np.random.randn(dims).astype(dtype) y = np.dot(x, true_weights) + np.random.randn(num_chains) # Setup model. prior = make_prior(dims, dtype) def target_log_prob_fn(weights): return prior.log_prob(weights) + make_likelihood(weights, x).log_prob(y) proposal = tfd.MultivariateNormalDiag( loc=tf.zeros(dims, dtype)) weight_samples, ais_weights, kernel_results = ( tfp.mcmc.sample_annealed_importance_chain( num_steps=1000, proposal_log_prob_fn=proposal.log_prob, target_log_prob_fn=target_log_prob_fn current_state=tf.zeros([num_chains, dims], dtype), make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=tlp_fn, step_size=0.1, num_leapfrog_steps=2))) log_normalizer_estimate = (tf.reduce_logsumexp(ais_weights) - np.log(num_chains)) ``` """ with tf.name_scope( name, "sample_annealed_importance_chain", [num_steps, current_state]): num_steps = tf.convert_to_tensor( num_steps, dtype=tf.int32, name="num_steps") if mcmc_util.is_list_like(current_state): current_state = [tf.convert_to_tensor(s, name="current_state") for s in current_state] else: current_state = tf.convert_to_tensor( current_state, name="current_state") def _make_convex_combined_log_prob_fn(iter_): def _fn(*args): p = tf.identity(proposal_log_prob_fn(*args), name="proposal_log_prob") t = tf.identity(target_log_prob_fn(*args), name="target_log_prob") dtype = p.dtype.base_dtype beta = tf.cast(iter_ + 1, dtype) / tf.cast(num_steps, dtype) return tf.identity(beta * t + (1. - beta) * p, name="convex_combined_log_prob") return _fn def _loop_body(iter_, ais_weights, current_state, kernel_results): """Closure which implements `tf.while_loop` body.""" x = (current_state if mcmc_util.is_list_like(current_state) else [current_state]) proposal_log_prob = proposal_log_prob_fn(*x) target_log_prob = target_log_prob_fn(*x) ais_weights += ((target_log_prob - proposal_log_prob) / tf.cast(num_steps, ais_weights.dtype)) kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_)) next_state, inner_results = kernel.one_step( current_state, kernel_results.inner_results) kernel_results = AISResults( proposal_log_prob=proposal_log_prob, target_log_prob=target_log_prob, inner_results=inner_results, ) return [iter_ + 1, ais_weights, next_state, kernel_results] def _bootstrap_results(init_state): """Creates first version of `previous_kernel_results`.""" kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_=0)) inner_results = kernel.bootstrap_results(init_state) convex_combined_log_prob = inner_results.accepted_results.target_log_prob dtype = convex_combined_log_prob.dtype.as_numpy_dtype shape = tf.shape(convex_combined_log_prob) proposal_log_prob = tf.fill(shape, dtype(np.nan), name="bootstrap_proposal_log_prob") target_log_prob = tf.fill(shape, dtype(np.nan), name="target_target_log_prob") return AISResults( proposal_log_prob=proposal_log_prob, target_log_prob=target_log_prob, inner_results=inner_results, ) previous_kernel_results = _bootstrap_results(current_state) inner_results = previous_kernel_results.inner_results ais_weights = tf.zeros( shape=tf.broadcast_dynamic_shape( tf.shape(inner_results.proposed_results.target_log_prob), tf.shape(inner_results.accepted_results.target_log_prob)), dtype=inner_results.proposed_results.target_log_prob.dtype.base_dtype) [_, ais_weights, current_state, kernel_results] = tf.while_loop( cond=lambda iter_, *args: iter_ < num_steps, body=_loop_body, loop_vars=[ np.int32(0), # iter_ ais_weights, current_state, previous_kernel_results, ], parallel_iterations=parallel_iterations) return [current_state, ais_weights, kernel_results]
def posterior_mode(self, observations): """Compute maximum likelihood sequence of hidden states. When this function is provided with a sequence of observations `x[0], ..., x[num_steps - 1]`, it returns the sequence of hidden states `z[0], ..., z[num_steps - 1]`, drawn from the underlying Markov chain, that is most likely to yield those observations. It uses the [Viterbi algorithm]( https://en.wikipedia.org/wiki/Viterbi_algorithm). Note: the behavior of this function is undefined if the `observations` argument represents impossible observations from the model. Note: if there isn't a unique most likely sequence then one of the equally most likely sequences is chosen. Args: observations: A tensor representing a batch of observations made on the hidden Markov model. The rightmost dimensions of this tensor correspond to the dimensions of the observation distributions of the underlying Markov chain. The next dimension from the right indexes the steps in a sequence of observations from a single sample from the hidden Markov model. The size of this dimension should match the `num_steps` parameter of the hidden Markov model object. The other dimensions are the dimensions of the batch and these are broadcast with the hidden Markov model's parameters. Returns: A tensor representing the most likely sequence of hidden states. The rightmost dimension of this tensor will equal the `num_steps` parameter providing one hidden state for each step. The other dimensions are those of the batch. Raises: ValueError: if the `observations` tensor does not consist of sequences of `num_steps` observations. #### Examples ```python tfd = tfp.distributions # A simple weather model. # Represent a cold day with 0 and a hot day with 1. # Suppose the first day of a sequence has a 0.8 chance of being cold. initial_distribution = tfd.Categorical(probs=[0.8, 0.2]) # Suppose a cold day has a 30% chance of being followed by a hot day # and a hot day has a 20% chance of being followed by a cold day. transition_distribution = tfd.Categorical(probs=[[0.7, 0.3], [0.2, 0.8]]) # Suppose additionally that on each day the temperature is # normally distributed with mean and standard deviation 0 and 5 on # a cold day and mean and standard deviation 15 and 10 on a hot day. observation_distribution = tfd.Normal(loc=[0., 15.], scale=[5., 10.]) # This gives the hidden Markov model: model = tfd.HiddenMarkovModel( initial_distribution=initial_distribution, transition_distribution=transition_distribution, observation_distribution=observation_distribution, num_steps=7) # Suppose we observe gradually rising temperatures over a week: temps = [-2., 0., 2., 4., 6., 8., 10.] # We can now compute the most probable sequence of hidden states: model.posterior_mode(temps) # The result is [0 0 0 0 0 1 1] telling us that the transition # from "cold" to "hot" most likely happened between the # 5th and 6th days. ``` """ with tf.compat.v1.name_scope("posterior_mode", values=[observations]): with tf.control_dependencies(self._runtime_assertions): observation_tensor_shape = tf.shape(input=observations) with self._observation_shape_preconditions( observation_tensor_shape): observation_batch_shape = observation_tensor_shape[:-1 - self. _underlying_event_rank] observation_event_shape = observation_tensor_shape[ -1 - self._underlying_event_rank:] batch_shape = tf.broadcast_dynamic_shape( observation_batch_shape, self.batch_shape_tensor()) log_init = tf.broadcast_to( self._log_init, tf.concat([batch_shape, [self._num_states]], axis=0)) observations = tf.broadcast_to( observations, tf.concat([batch_shape, observation_event_shape], axis=0)) observation_rank = tf.rank(observations) underlying_event_rank = self._underlying_event_rank observations = util.move_dimension( observations, observation_rank - underlying_event_rank - 1, 0) # We need to compute the probability of each observation for # each possible state. # This requires inserting an extra index just before the # observation event indices that will be broadcast with the # last batch index in `observation_distribution`. observations = tf.expand_dims( observations, observation_rank - underlying_event_rank) observation_log_probs = self._observation_distribution.log_prob( observations) log_prob = log_init + observation_log_probs[0] if self._num_steps == 1: most_likely_end = tf.argmax(input=log_prob, axis=-1) return most_likely_end[..., tf.newaxis] def forward_step(previous_step_pair, log_prob_observation): log_prob_previous = previous_step_pair[0] log_prob = (log_prob_previous[..., tf.newaxis] + self._log_trans + log_prob_observation[..., tf.newaxis, :]) most_likely_given_successor = tf.argmax(input=log_prob, axis=-2) max_log_p_given_successor = tf.reduce_max( input_tensor=log_prob, axis=-2) return (max_log_p_given_successor, most_likely_given_successor) forward_log_probs, all_most_likely_given_successor = tf.scan( forward_step, observation_log_probs[1:], initializer=(log_prob, tf.zeros(tf.shape(input=log_init), dtype=tf.int64)), name="forward_log_probs") most_likely_end = tf.argmax(input=forward_log_probs[-1], axis=-1) # We require the operation that gives C from A and B where # C[i...j] = A[i...j, B[i...j]] # and A = most_likely_given_successor # B = most_likely_successor. # tf.gather requires indices of known shape so instead we use # reduction with tf.one_hot(B) to pick out elements from B def backward_step(most_likely_successor, most_likely_given_successor): return tf.reduce_sum( input_tensor=(most_likely_given_successor * tf.one_hot(most_likely_successor, self._num_states, dtype=tf.int64)), axis=-1) backward_scan = tf.scan(backward_step, all_most_likely_given_successor, most_likely_end, reverse=True) most_likely_sequences = tf.concat( [backward_scan, [most_likely_end]], axis=0) return util.move_dimension(most_likely_sequences, 0, -1)
def sample_annealed_importance_chain(num_steps, proposal_log_prob_fn, target_log_prob_fn, current_state, make_kernel_fn, parallel_iterations=10, name=None): """Runs annealed importance sampling (AIS) to estimate normalizing constants. This function uses an MCMC transition operator (e.g., Hamiltonian Monte Carlo) to sample from a series of distributions that slowly interpolates between an initial "proposal" distribution: `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)` and the target distribution: `exp(target_log_prob_fn(x) - target_log_normalizer)`, accumulating importance weights along the way. The product of these importance weights gives an unbiased estimate of the ratio of the normalizing constants of the initial distribution and the target distribution: `E[exp(ais_weights)] = exp(target_log_normalizer - proposal_log_normalizer)`. Note: When running in graph mode, `proposal_log_prob_fn` and `target_log_prob_fn` are called exactly three times (although this may be reduced to two times in the future). Args: num_steps: Integer number of Markov chain updates to run. More iterations means more expense, but smoother annealing between q and p, which in turn means exponentially lower variance for the normalizing constant estimator. proposal_log_prob_fn: Python callable that returns the log density of the initial distribution. target_log_prob_fn: Python callable which takes an argument like `current_state` (or `*current_state` if it's a list) and returns its (possibly unnormalized) log-density under the target distribution. current_state: `Tensor` or Python `list` of `Tensor`s representing the current state(s) of the Markov chain(s). The first `r` dimensions index independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`. make_kernel_fn: Python `callable` which returns a `TransitionKernel`-like object. Must take one argument representing the `TransitionKernel`'s `target_log_prob_fn`. The `target_log_prob_fn` argument represents the `TransitionKernel`'s target log distribution. Note: `sample_annealed_importance_chain` creates a new `target_log_prob_fn` which is an interpolation between the supplied `target_log_prob_fn` and `proposal_log_prob_fn`; it is this interpolated function which is used as an argument to `make_kernel_fn`. parallel_iterations: The number of iterations allowed to run in parallel. It must be a positive integer. See `tf.while_loop` for more details. name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., "sample_annealed_importance_chain"). Returns: next_state: `Tensor` or Python list of `Tensor`s representing the state(s) of the Markov chain(s) at the final iteration. Has same shape as input `current_state`. ais_weights: Tensor with the estimated weight(s). Has shape matching `target_log_prob_fn(current_state)`. kernel_results: `collections.namedtuple` of internal calculations used to advance the chain. #### Examples ##### Estimate the normalizing constant of a log-gamma distribution. ```python tfd = tfp.distributions # Run 100 AIS chains in parallel num_chains = 100 dims = 20 dtype = np.float32 proposal = tfd.MultivatiateNormalDiag( loc=tf.zeros([dims], dtype=dtype)) target = tfd.TransformedDistribution( distribution=tfd.Gamma(concentration=dtype(2), rate=dtype(3)), bijector=tfp.bijectors.Invert(tfp.bijectors.Exp()), event_shape=[dims]) chains_state, ais_weights, kernels_results = ( tfp.mcmc.sample_annealed_importance_chain( num_steps=1000, proposal_log_prob_fn=proposal.log_prob, target_log_prob_fn=target.log_prob, current_state=proposal.sample(num_chains), make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=tlp_fn, step_size=0.2, num_leapfrog_steps=2))) log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights) - np.log(num_chains)) log_true_normalizer = tf.lgamma(2.) - 2. * tf.log(3.) ``` ##### Estimate marginal likelihood of a Bayesian regression model. ```python tfd = tfp.distributions def make_prior(dims, dtype): return tfd.MultivariateNormalDiag( loc=tf.zeros(dims, dtype)) def make_likelihood(weights, x): return tfd.MultivariateNormalDiag( loc=tf.tensordot(weights, x, axes=[[0], [-1]])) # Run 100 AIS chains in parallel num_chains = 100 dims = 10 dtype = np.float32 # Make training data. x = np.random.randn(num_chains, dims).astype(dtype) true_weights = np.random.randn(dims).astype(dtype) y = np.dot(x, true_weights) + np.random.randn(num_chains) # Setup model. prior = make_prior(dims, dtype) def target_log_prob_fn(weights): return prior.log_prob(weights) + make_likelihood(weights, x).log_prob(y) proposal = tfd.MultivariateNormalDiag( loc=tf.zeros(dims, dtype)) weight_samples, ais_weights, kernel_results = ( tfp.mcmc.sample_annealed_importance_chain( num_steps=1000, proposal_log_prob_fn=proposal.log_prob, target_log_prob_fn=target_log_prob_fn current_state=tf.zeros([num_chains, dims], dtype), make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=tlp_fn, step_size=0.1, num_leapfrog_steps=2))) log_normalizer_estimate = (tf.reduce_logsumexp(ais_weights) - np.log(num_chains)) ``` """ with tf.name_scope(name, "sample_annealed_importance_chain", [num_steps, current_state]): num_steps = tf.convert_to_tensor(num_steps, dtype=tf.int32, name="num_steps") if mcmc_util.is_list_like(current_state): current_state = [ tf.convert_to_tensor(s, name="current_state") for s in current_state ] else: current_state = tf.convert_to_tensor(current_state, name="current_state") def _make_convex_combined_log_prob_fn(iter_): def _fn(*args): p = tf.identity(proposal_log_prob_fn(*args), name="proposal_log_prob") t = tf.identity(target_log_prob_fn(*args), name="target_log_prob") dtype = p.dtype.base_dtype beta = tf.cast(iter_ + 1, dtype) / tf.cast(num_steps, dtype) return tf.identity(beta * t + (1. - beta) * p, name="convex_combined_log_prob") return _fn def _loop_body(iter_, ais_weights, current_state, kernel_results): """Closure which implements `tf.while_loop` body.""" x = (current_state if mcmc_util.is_list_like(current_state) else [current_state]) proposal_log_prob = proposal_log_prob_fn(*x) target_log_prob = target_log_prob_fn(*x) ais_weights += ((target_log_prob - proposal_log_prob) / tf.cast(num_steps, ais_weights.dtype)) kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_)) next_state, inner_results = kernel.one_step( current_state, kernel_results.inner_results) kernel_results = AISResults( proposal_log_prob=proposal_log_prob, target_log_prob=target_log_prob, inner_results=inner_results, ) return [iter_ + 1, ais_weights, next_state, kernel_results] def _bootstrap_results(init_state): """Creates first version of `previous_kernel_results`.""" kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_=0)) inner_results = kernel.bootstrap_results(init_state) convex_combined_log_prob = inner_results.accepted_results.target_log_prob dtype = convex_combined_log_prob.dtype.as_numpy_dtype shape = tf.shape(convex_combined_log_prob) proposal_log_prob = tf.fill(shape, dtype(np.nan), name="bootstrap_proposal_log_prob") target_log_prob = tf.fill(shape, dtype(np.nan), name="target_target_log_prob") return AISResults( proposal_log_prob=proposal_log_prob, target_log_prob=target_log_prob, inner_results=inner_results, ) previous_kernel_results = _bootstrap_results(current_state) inner_results = previous_kernel_results.inner_results ais_weights = tf.zeros(shape=tf.broadcast_dynamic_shape( tf.shape(inner_results.proposed_results.target_log_prob), tf.shape(inner_results.accepted_results.target_log_prob)), dtype=inner_results.proposed_results. target_log_prob.dtype.base_dtype) [_, ais_weights, current_state, kernel_results] = tf.while_loop( cond=lambda iter_, *args: iter_ < num_steps, body=_loop_body, loop_vars=[ np.int32(0), # iter_ ais_weights, current_state, previous_kernel_results, ], parallel_iterations=parallel_iterations) return [current_state, ais_weights, kernel_results]
def lu_solve(lower_upper, perm, rhs, validate_args=False, name=None): """Solves systems of linear eqns `A X = RHS`, given LU factorizations. Note: this function does not verify the implied matrix is actually invertible nor is this condition checked even when `validate_args=True`. Args: lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `lower_upper = L + U - eye`. perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `perm = argmax(P)`. rhs: Matrix-shaped float `Tensor` representing targets for which to solve; `A X = RHS`. To handle vector cases, use: `lu_solve(..., rhs[..., tf.newaxis])[..., 0]`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Note: this function does not verify the implied matrix is actually invertible, even when `validate_args=True`. Default value: `False` (i.e., don't validate arguments). name: Python `str` name given to ops managed by this object. Default value: `None` (i.e., "lu_solve"). Returns: x: The `X` in `A @ X = RHS`. #### Examples ```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp x = [[[1., 2], [3, 4]], [[7, 8], [3, 4]]] inv_x = tfp.math.lu_solve(*tf.linalg.lu(x), rhs=tf.eye(2)) tf.assert_near(tf.matrix_inverse(x), inv_x) # ==> True ``` """ with tf.name_scope(name, 'lu_solve', [lower_upper, perm, rhs]): lower_upper = tf.convert_to_tensor(value=lower_upper, dtype_hint=tf.float32, name='lower_upper') perm = tf.convert_to_tensor(value=perm, dtype_hint=tf.int32, name='perm') rhs = tf.convert_to_tensor(value=rhs, dtype_hint=lower_upper.dtype, name='rhs') assertions = _lu_solve_assertions(lower_upper, perm, rhs, validate_args) if assertions: with tf.control_dependencies(assertions): lower_upper = tf.identity(lower_upper) perm = tf.identity(perm) rhs = tf.identity(rhs) if rhs.shape.ndims == 2 and perm.shape.ndims == 1: # Both rhs and perm have scalar batch_shape. permuted_rhs = tf.gather(rhs, perm, axis=-2) else: # Either rhs or perm have non-scalar batch_shape or we can't determine # this information statically. rhs_shape = tf.shape(input=rhs) broadcast_batch_shape = tf.broadcast_dynamic_shape( rhs_shape[:-2], tf.shape(input=perm)[:-1]) d, m = rhs_shape[-2], rhs_shape[-1] rhs_broadcast_shape = tf.concat([broadcast_batch_shape, [d, m]], axis=0) # Tile out rhs. broadcast_rhs = tf.broadcast_to(rhs, rhs_broadcast_shape) broadcast_rhs = tf.reshape(broadcast_rhs, [-1, d, m]) # Tile out perm and add batch indices. broadcast_perm = tf.broadcast_to(perm, rhs_broadcast_shape[:-1]) broadcast_perm = tf.reshape(broadcast_perm, [-1, d]) broadcast_batch_size = tf.reduce_prod( input_tensor=broadcast_batch_shape) broadcast_batch_indices = tf.broadcast_to( tf.range(broadcast_batch_size)[:, tf.newaxis], [broadcast_batch_size, d]) broadcast_perm = tf.stack( [broadcast_batch_indices, broadcast_perm], axis=-1) permuted_rhs = tf.gather_nd(broadcast_rhs, broadcast_perm) permuted_rhs = tf.reshape(permuted_rhs, rhs_broadcast_shape) lower = tf.linalg.set_diag( tf.linalg.band_part(lower_upper, num_lower=-1, num_upper=0), tf.ones(tf.shape(input=lower_upper)[:-1], dtype=lower_upper.dtype)) return linear_operator_util.matrix_triangular_solve_with_broadcast( lower_upper, # Only upper is accessed. linear_operator_util.matrix_triangular_solve_with_broadcast( lower, permuted_rhs), lower=False)
def sample_n(self, n, seed=None): shape = tf.concat([[n], tf.broadcast_dynamic_shape(tf.shape(self.loc), tf.shape(self.scale))], axis=0) sampled = tf.random.normal(shape=shape, mean=0., stddev=1., dtype=tf.float32, seed=seed) return sampled * self.scale + self.loc
def _batch_shape(self): return tf.broadcast_dynamic_shape(tf.shape(self.loc), tf.shape(self.scale))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( self.kernel.batch_shape_tensor(), tf.shape(input=self.scale_diag)[:-self.kernel.feature_ndims])
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(self.total_count), tf.shape(self.probs))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(input=self.concentration), tf.shape(input=self.rate))
def _batch_shape_tensor(self): with self._name_scope("batch_shape_tensor"): return tf.broadcast_dynamic_shape(tf.shape(self.amplitude), tf.shape(self.length_scale))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape(tf.shape(input=self.loc), tf.shape(input=self.scale))
def posterior_marginals(self, observations): """Compute marginal posterior distribution for each state. This function computes, for each time step, the marginal conditional probability that the hidden Markov model was in each possible state given the observations that were made at each time step. So if the hidden states are `z[0],...,z[num_steps - 1]` and the observations are `x[0], ..., x[num_steps - 1]`, then this function computes `P(z[i] | x[0], ..., x[num_steps - 1])` for all `i` from `0` to `num_steps - 1`. This operation is sometimes called smoothing. It uses a form of the forward-backward algorithm. Note: the behavior of this function is undefined if the `observations` argument represents impossible observations from the model. Args: observations: A tensor representing a batch of observations made on the hidden Markov model. The rightmost dimension of this tensor gives the steps in a sequence of observations from a single sample from the hidden Markov model. The size of this dimension should match the `num_steps` parameter of the hidden Markov model object. The other dimensions are the dimensions of the batch and these are broadcast with the hidden Markov model's parameters. Returns: A `Categorical` distribution object representing the marginal probability of the hidden Markov model being in each state at each step. The rightmost dimension of the `Categorical` distributions batch will equal the `num_steps` parameter providing one marginal distribution for each step. The other dimensions are the dimensions corresponding to the batch of observations. Raises: ValueError: if rightmost dimension of `observations` does not have size `num_steps`. """ with tf.compat.v1.name_scope("posterior_marginals", values=[observations]): with tf.control_dependencies(self._runtime_assertions): observation_tensor_shape = tf.shape(input=observations) with self._observation_shape_preconditions( observation_tensor_shape): observation_batch_shape = observation_tensor_shape[:-1 - self. _underlying_event_rank] observation_event_shape = observation_tensor_shape[ -1 - self._underlying_event_rank:] batch_shape = tf.broadcast_dynamic_shape( observation_batch_shape, self.batch_shape_tensor()) log_init = tf.broadcast_to( self._log_init, tf.concat([batch_shape, [self._num_states]], axis=0)) log_transition = self._log_trans observations = tf.broadcast_to( observations, tf.concat([batch_shape, observation_event_shape], axis=0)) observation_rank = tf.rank(observations) underlying_event_rank = self._underlying_event_rank observations = util.move_dimension( observations, observation_rank - underlying_event_rank - 1, 0) observations = tf.expand_dims( observations, observation_rank - underlying_event_rank) observation_log_probs = self._observation_distribution.log_prob( observations) log_adjoint_prob = tf.zeros_like(log_init) def forward_step(log_previous_step, log_prob_observation): return _log_vector_matrix( log_previous_step, log_transition) + log_prob_observation log_prob = log_init + observation_log_probs[0] forward_log_probs = tf.scan(forward_step, observation_log_probs[1:], initializer=log_prob, name="forward_log_probs") forward_log_probs = tf.concat( [[log_prob], forward_log_probs], axis=0) def backward_step(log_previous_step, log_prob_observation): return _log_matrix_vector( log_transition, log_prob_observation + log_previous_step) backward_log_adjoint_probs = tf.scan( backward_step, observation_log_probs[1:], initializer=log_adjoint_prob, reverse=True, name="backward_log_adjoint_probs") total_log_prob = tf.reduce_logsumexp( input_tensor=forward_log_probs[-1], axis=-1) backward_log_adjoint_probs = tf.concat( [backward_log_adjoint_probs, [log_adjoint_prob]], axis=0) log_likelihoods = forward_log_probs + backward_log_adjoint_probs marginal_log_probs = util.move_dimension( log_likelihoods - total_log_prob[..., tf.newaxis], 0, -2) return categorical.Categorical(logits=marginal_log_probs)
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape(tf.shape(self.loc), tf.shape(self.concentration))
def _dynamic_broadcast_shape_from_tensors(*tensors): shape = tf.shape(tensors[0]) for t in tensors[1:]: shape = tf.broadcast_dynamic_shape(shape, tf.shape(t)) return shape
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape(tf.shape(self.loc), tf.shape(self.scale))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape( tf.shape(self.concentration), tf.shape(self.rate))
def _batch_shape(self): return tf.broadcast_dynamic_shape(tf.shape(self.mean), tf.shape(self.std))
def _batch_shape_tensor(self): return tf.broadcast_dynamic_shape(tf.shape(input=self.temperature), tf.shape(input=self.logits)[:-1])
def _batch_shape(self): return tf.broadcast_dynamic_shape(tf.shape(self.alpha), tf.shape(self.beta))