def dense_to_sparse(dense_input): with tf.control_dependencies([tf.assert_integer(dense_input)]): idx = tf.where(tf.not_equal(dense_input, 0)) vals = tf.gather_nd(dense_input, idx) shape = tf.cast(tf.shape(dense_input), tf.int64) sparse = tf.SparseTensor(idx, vals, shape) return sparse
def test_raises_when_float(self): with self.test_session(): floats = tf.constant([1.0, 2.0], name="floats") with tf.control_dependencies([tf.assert_integer(floats)]): out = tf.identity(floats) with self.assertRaisesOpError("x is not of integer dtype.*"): out.eval()
def percentile(x, q, axis=None, interpolation=None, keep_dims=False, validate_args=False, name=None): """Compute the `q`-th percentile(s) of `x`. Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the way from the minimum to the maximum in a sorted copy of `x`. The values and distances of the two nearest neighbors as well as the `interpolation` parameter will determine the percentile if the normalized ranking does not match the location of `q` exactly. This function is the same as the median if `q = 50`, the same as the minimum if `q = 0` and the same as the maximum if `q = 100`. Multiple percentiles can be computed at once by using `1-D` vector `q`. Dimension zero of the returned `Tensor` will index the different percentiles. ```python # Get 30th percentile with default ('nearest') interpolation. x = [1., 2., 3., 4.] percentile(x, q=30.) ==> 2.0 # Get 30th and 70th percentiles with 'lower' interpolation x = [1., 2., 3., 4.] percentile(x, q=[30., 70.], interpolation='lower') ==> [1., 3.] # Get 100th percentile (maximum). By default, this is computed over every dim x = [[1., 2.] [3., 4.]] percentile(x, q=100.) ==> 4. # Treat the leading dim as indexing samples, and find the 100th quantile (max) # over all such samples. x = [[1., 2.] [3., 4.]] percentile(x, q=100., axis=[0]) ==> [3., 4.] ``` Compare to `numpy.percentile`. Args: x: Floating point `N-D` `Tensor` with `N > 0`. If `axis` is not `None`, `x` must have statically known number of dimensions. q: Scalar or vector `Tensor` with values in `[0, 100]`. The percentile(s). axis: Optional `0-D` or `1-D` integer `Tensor` with constant values. The axis that hold independent samples over which to return the desired percentile. If `None` (the default), treat every dimension as a sample dimension, returning a scalar. interpolation : {'lower', 'higher', 'nearest'}. Default: 'nearest' This optional parameter specifies the interpolation method to use when the desired quantile lies between two data points `i < j`: * lower: `i`. * higher: `j`. * nearest: `i` or `j`, whichever is nearest. keep_dims: Python `bool`. If `True`, the last dimension is kept with size 1 If `False`, the last dimension is removed from the output shape. validate_args: Whether to add runtime checks of argument validity. If False, and arguments are incorrect, correct behavior is not guaranteed. name: A Python string name to give this `Op`. Default is 'percentile' Returns: A `(rank(q) + N - len(axis))` dimensional `Tensor` of same dtype as `x`, or, if `axis` is `None`, a `rank(q)` `Tensor`. The first `rank(q)` dimensions index quantiles for different values of `q`. Raises: ValueError: If argument 'interpolation' is not an allowed type. """ name = name or 'percentile' allowed_interpolations = {'lower', 'higher', 'nearest'} if interpolation is None: interpolation = 'nearest' else: if interpolation not in allowed_interpolations: raise ValueError( 'Argument `interpolation` must be in %s. Found %s' % (allowed_interpolations, interpolation)) with tf.name_scope(name, values=[x, q]): x = tf.convert_to_tensor(x, name='x') # Double is needed here and below, else we get the wrong index if the array # is huge along axis. q = tf.to_double(q, name='q') _get_static_ndims(q, expect_ndims_no_more_than=1) if validate_args: q = control_flow_ops.with_dependencies([ tf.assert_rank_in(q, [0, 1]), tf.assert_greater_equal(q, tf.to_double(0.)), tf.assert_less_equal(q, tf.to_double(100.)) ], q) if axis is None: y = tf.reshape(x, [-1]) else: axis = tf.convert_to_tensor(axis, name='axis') tf.assert_integer(axis) axis_ndims = _get_static_ndims(axis, expect_static=True, expect_ndims_no_more_than=1) axis_const = tensor_util.constant_value(axis) if axis_const is None: raise ValueError( 'Expected argument `axis` to be statically available. Found: %s' % axis) axis = axis_const if axis_ndims == 0: axis = [axis] axis = [int(a) for a in axis] x_ndims = _get_static_ndims(x, expect_static=True, expect_ndims_at_least=1) axis = _make_static_axis_non_negative(axis, x_ndims) # Move dims in axis to the end, since _sort_tensor, which calls top_k, # only sorts the last dim. y = _move_dims_to_flat_end(x, axis, x_ndims) frac_at_q_or_above = 1. - q / 100. d = tf.to_double(tf.shape(y)[-1]) if interpolation == 'lower': indices = tf.ceil((d - 1) * frac_at_q_or_above) elif interpolation == 'higher': indices = tf.floor((d - 1) * frac_at_q_or_above) elif interpolation == 'nearest': indices = tf.round((d - 1) * frac_at_q_or_above) # If d is gigantic, then we would have d == d - 1, even in double... So # let's use max/min to avoid out of bounds errors. d = tf.shape(y)[-1] # d - 1 will be distinct from d in int32. indices = tf.clip_by_value(tf.to_int32(indices), 0, d - 1) # Sort everything, not just the top 'k' entries, which allows multiple calls # to sort only once (under the hood) and use CSE. sorted_y = _sort_tensor(y) # Gather the indices along the sorted (last) dimension. # If q is a vector, the last dim of gathered_y indexes different q[i]. gathered_y = tf.gather(sorted_y, indices, axis=-1) if keep_dims: if axis is None: ones_vec = tf.ones(shape=[ _get_best_effort_ndims(x) + _get_best_effort_ndims(q) ], dtype=tf.int32) gathered_y *= tf.ones(ones_vec, dtype=x.dtype) else: gathered_y = _insert_back_keep_dims(gathered_y, axis) # If q is a scalar, then result has the right shape. # If q is a vector, then result has trailing dim of shape q.shape, which # needs to be rotated to dim 0. return util.rotate_transpose(gathered_y, tf.rank(q))
def test_doesnt_raise_when_integer(self): with self.test_session(): integers = tf.constant([1, 2], name="integers") with tf.control_dependencies([tf.assert_integer(integers)]): out = tf.identity(integers) out.eval()
def __init__(self, learning_rate, preconditioner_decay_rate=0.95, num_pseudo_batches=1, burnin=25, diagonal_bias=1e-8, name=None, variable_scope=None): default_name = 'StochasticGradientLangevinDynamics' with tf.name_scope(name, default_name, [ learning_rate, preconditioner_decay_rate, num_pseudo_batches, burnin, diagonal_bias ]): if variable_scope is None: var_scope_name = tf.get_default_graph().unique_name( name or default_name) with tf.variable_scope(var_scope_name) as scope: self._variable_scope = scope else: self._variable_scope = variable_scope self._preconditioner_decay_rate = tf.convert_to_tensor( preconditioner_decay_rate, name='preconditioner_decay_rate') self._num_pseudo_batches = tf.convert_to_tensor( num_pseudo_batches, name='num_pseudo_batches') self._burnin = tf.convert_to_tensor(burnin, name='burnin') self._diagonal_bias = tf.convert_to_tensor(diagonal_bias, name='diagonal_bias') self._learning_rate = tf.convert_to_tensor(learning_rate, name='learning_rate') with tf.variable_scope(self._variable_scope): self._counter = tf.get_variable('counter', initializer=0, trainable=False) self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._preconditioner_decay_rate, message='`preconditioner_decay_rate` must be non-negative' ), tf.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.'), ], self._preconditioner_decay_rate) self._num_pseudo_batches = control_flow_ops.with_dependencies([ tf.assert_greater( self._num_pseudo_batches, 0, message='`num_pseudo_batches` must be greater than zero') ], self._num_pseudo_batches) self._burnin = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf.assert_integer(self._burnin, message='`burnin` must be an integer') ], self._burnin) self._diagonal_bias = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._diagonal_bias, message='`diagonal_bias` must be non-negative') ], self._diagonal_bias) super(StochasticGradientLangevinDynamics, self).__init__(use_locking=False, name=name or default_name)
def test_raises_when_float(self): with self.test_session(): floats = tf.constant([1.0, 2.0], name="floats") with self.assertRaisesRegexp(TypeError, "Expected.*integer"): tf.assert_integer(floats)
tf.multinomial() tf.make_ndarray() tf.map_fn() tf.map_stage() tf.map_size() tf.matrix_transpose() tf.transpose() tf.add_n() tf.add_to_collection() tf.all_variables() tf.arg_max() tf.arg_min() tf.assert_equal() tf.assert_greater() tf.assert_greater_equal() tf.assert_integer() tf.assert_less() tf.assert_less_equal() tf.assert_non_negative() tf.assign() tf.assign_add() tf.assign_sub() tf.argmax() tf.argmin() tf.clip_by_average_norm() tf.cast() tf.case() tf.ceil() tf.check_numerics() tf.check_ops tf.cholesky()
def __init__(self, batch_size, total_num_examples, max_learning_rate=1., preconditioner_decay_rate=0.95, burnin=25, burnin_max_learning_rate=1e-6, use_single_learning_rate=False, name=None, variable_scope=None): default_name = 'VariationalSGD' with tf.name_scope(name, default_name, [ max_learning_rate, preconditioner_decay_rate, batch_size, burnin, burnin_max_learning_rate ]): if variable_scope is None: var_scope_name = tf.get_default_graph().unique_name( name or default_name) with tf.variable_scope(var_scope_name) as scope: self._variable_scope = scope else: self._variable_scope = variable_scope self._preconditioner_decay_rate = tf.convert_to_tensor( preconditioner_decay_rate, name='preconditioner_decay_rate') self._batch_size = tf.convert_to_tensor(batch_size, name='batch_size') self._total_num_examples = tf.convert_to_tensor( total_num_examples, name='total_num_examples') self._burnin = tf.convert_to_tensor(burnin, name='burnin') self._burnin_max_learning_rate = tf.convert_to_tensor( burnin_max_learning_rate, name='burnin_max_learning_rate') self._max_learning_rate = tf.convert_to_tensor( max_learning_rate, name='max_learning_rate') self._use_single_learning_rate = use_single_learning_rate with tf.variable_scope(self._variable_scope): self._counter = tf.get_variable('counter', initializer=0, trainable=False) self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._preconditioner_decay_rate, message='`preconditioner_decay_rate` must be non-negative' ), tf.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.'), ], self._preconditioner_decay_rate) self._batch_size = control_flow_ops.with_dependencies([ tf.assert_greater( self._batch_size, 0, message='`batch_size` must be greater than zero') ], self._batch_size) self._total_num_examples = control_flow_ops.with_dependencies([ tf.assert_greater( self._total_num_examples, 0, message='`total_num_examples` must be greater than zero') ], self._total_num_examples) self._burnin = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf.assert_integer(self._burnin, message='`burnin` must be an integer') ], self._burnin) self._burnin_max_learning_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin_max_learning_rate, message='`burnin_max_learning_rate` must be non-negative') ], self._burnin_max_learning_rate) self._max_learning_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._max_learning_rate, message='`max_learning_rate` must be non-negative') ], self._max_learning_rate) super(VariationalSGD, self).__init__(use_locking=False, name=name or default_name)
def percentile(x, q, axis=None, interpolation=None, keep_dims=False, validate_args=False, name=None): """Compute the `q`-th percentile of `x`. Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the way from the minimum to the maximum in a sorted copy of `x`. The values and distances of the two nearest neighbors as well as the `interpolation` parameter will determine the percentile if the normalized ranking does not match the location of `q` exactly. This function is the same as the median if `q = 50`, the same as the minimum if `q = 0` and the same as the maximum if `q = 100`. ```python # Get 30th percentile with default ('nearest') interpolation. x = [1., 2., 3., 4.] percentile(x, q=30.) ==> 2.0 # Get 30th percentile with 'lower' interpolation x = [1., 2., 3., 4.] percentile(x, q=30., interpolation='lower') ==> 1.0 # Get 100th percentile (maximum). By default, this is computed over every dim x = [[1., 2.] [3., 4.]] percentile(x, q=100.) ==> 4.0 # Treat the leading dim as indexing samples, and find the 100th quantile (max) # over all such samples. x = [[1., 2.] [3., 4.]] percentile(x, q=100., axis=[0]) ==> [3., 4.] ``` Compare to `numpy.percentile`. Args: x: Floating point `N-D` `Tensor` with `N > 0`. If `axis` is not `None`, `x` must have statically known number of dimensions. q: Scalar `Tensor` in `[0, 100]`. The percentile. axis: Optional `0-D` or `1-D` integer `Tensor` with constant values. The axis that hold independent samples over which to return the desired percentile. If `None` (the default), treat every dimension as a sample dimension, returning a scalar. interpolation : {"lower", "higher", "nearest"}. Default: "nearest" This optional parameter specifies the interpolation method to use when the desired quantile lies between two data points `i < j`: * lower: `i`. * higher: `j`. * nearest: `i` or `j`, whichever is nearest. keep_dims: Python `bool`. If `True`, the last dimension is kept with size 1 If `False`, the last dimension is removed from the output shape. validate_args: Whether to add runtime checks of argument validity. If False, and arguments are incorrect, correct behavior is not guaranteed. name: A Python string name to give this `Op`. Default is "percentile" Returns: A `(N - len(axis))` dimensional `Tensor` of same dtype as `x`, or, if `axis` is `None`, a scalar. Raises: ValueError: If argument 'interpolation' is not an allowed type. """ name = name or "percentile" allowed_interpolations = {"lower", "higher", "nearest"} if interpolation is None: interpolation = "nearest" else: if interpolation not in allowed_interpolations: raise ValueError("Argument 'interpolation' must be in %s. Found %s" % (allowed_interpolations, interpolation)) with tf.name_scope(name, [x, q]): x = tf.convert_to_tensor(x, name="x") # Double is needed here and below, else we get the wrong index if the array # is huge along axis. q = tf.to_double(q, name="q") _get_static_ndims(q, expect_ndims=0) if validate_args: q = control_flow_ops.with_dependencies([ tf.assert_rank(q, 0), tf.assert_greater_equal(q, tf.to_double(0.)), tf.assert_less_equal(q, tf.to_double(100.)) ], q) if axis is None: y = tf.reshape(x, [-1]) else: axis = tf.convert_to_tensor(axis, name="axis") tf.assert_integer(axis) axis_ndims = _get_static_ndims( axis, expect_static=True, expect_ndims_no_more_than=1) axis_const = tensor_util.constant_value(axis) if axis_const is None: raise ValueError( "Expected argument 'axis' to be statically available. Found: %s" % axis) axis = axis_const if axis_ndims == 0: axis = [axis] axis = [int(a) for a in axis] x_ndims = _get_static_ndims( x, expect_static=True, expect_ndims_at_least=1) axis = _make_static_axis_non_negative(axis, x_ndims) y = _move_dims_to_flat_end(x, axis, x_ndims) frac_at_q_or_above = 1. - q / 100. d = tf.to_double(tf.shape(y)[-1]) if interpolation == "lower": index = tf.ceil((d - 1) * frac_at_q_or_above) elif interpolation == "higher": index = tf.floor((d - 1) * frac_at_q_or_above) elif interpolation == "nearest": index = tf.round((d - 1) * frac_at_q_or_above) # If d is gigantic, then we would have d == d - 1, even in double... So # let's use max/min to avoid out of bounds errors. d = tf.shape(y)[-1] # d - 1 will be distinct from d in int32. index = tf.clip_by_value(tf.to_int32(index), 0, d - 1) # Sort everything, not just the top 'k' entries, which allows multiple calls # to sort only once (under the hood) and use CSE. sorted_y = _sort_tensor(y) # result.shape = B result = sorted_y[..., index] result.set_shape(y.get_shape()[:-1]) if keep_dims: if axis is None: # ones_vec = [1, 1,..., 1], total length = len(S) + len(B). ones_vec = tf.ones(shape=[_get_best_effort_ndims(x)], dtype=tf.int32) result *= tf.ones(ones_vec, dtype=x.dtype) else: result = _insert_back_keep_dims(result, axis) return result
def __init__(self, learning_rate, preconditioner_decay_rate=0.95, data_size=1, burnin=25, diagonal_bias=1e-8, name=None, parallel_iterations=10, variable_scope=None): default_name = 'StochasticGradientLangevinDynamics' with tf.name_scope(name, default_name, [ learning_rate, preconditioner_decay_rate, data_size, burnin, diagonal_bias ]): if tf.executing_eagerly(): raise NotImplementedError('Eager execution currently not supported for ' ' SGLD optimizer.') if variable_scope is None: var_scope_name = tf.get_default_graph().unique_name( name or default_name) with tf.variable_scope(var_scope_name) as scope: self._variable_scope = scope else: self._variable_scope = variable_scope self._preconditioner_decay_rate = tf.convert_to_tensor( preconditioner_decay_rate, name='preconditioner_decay_rate') self._data_size = tf.convert_to_tensor( data_size, name='data_size') self._burnin = tf.convert_to_tensor(burnin, name='burnin') self._diagonal_bias = tf.convert_to_tensor( diagonal_bias, name='diagonal_bias') self._learning_rate = tf.convert_to_tensor( learning_rate, name='learning_rate') self._parallel_iterations = parallel_iterations with tf.variable_scope(self._variable_scope): self._counter = tf.get_variable( 'counter', initializer=0, trainable=False) self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._preconditioner_decay_rate, message='`preconditioner_decay_rate` must be non-negative'), tf.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.'), ], self._preconditioner_decay_rate) self._data_size = control_flow_ops.with_dependencies([ tf.assert_greater( self._data_size, 0, message='`data_size` must be greater than zero') ], self._data_size) self._burnin = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf.assert_integer( self._burnin, message='`burnin` must be an integer') ], self._burnin) self._diagonal_bias = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._diagonal_bias, message='`diagonal_bias` must be non-negative') ], self._diagonal_bias) super(StochasticGradientLangevinDynamics, self).__init__( use_locking=False, name=name or default_name)
def percentile(x, q, axis=None, interpolation=None, keep_dims=False, validate_args=False, preserve_gradients=True, name=None): """Compute the `q`-th percentile(s) of `x`. Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the way from the minimum to the maximum in a sorted copy of `x`. The values and distances of the two nearest neighbors as well as the `interpolation` parameter will determine the percentile if the normalized ranking does not match the location of `q` exactly. This function is the same as the median if `q = 50`, the same as the minimum if `q = 0` and the same as the maximum if `q = 100`. Multiple percentiles can be computed at once by using `1-D` vector `q`. Dimension zero of the returned `Tensor` will index the different percentiles. ```python # Get 30th percentile with default ('nearest') interpolation. x = [1., 2., 3., 4.] tfp.stats.percentile(x, q=30.) ==> 2.0 # Get 30th percentile with 'linear' interpolation. x = [1., 2., 3., 4.] tfp.stats.percentile(x, q=30., interpolation='linear') ==> 1.9 # Get 30th and 70th percentiles with 'lower' interpolation x = [1., 2., 3., 4.] tfp.stats.percentile(x, q=[30., 70.], interpolation='lower') ==> [1., 3.] # Get 100th percentile (maximum). By default, this is computed over every dim x = [[1., 2.] [3., 4.]] tfp.stats.percentile(x, q=100.) ==> 4. # Treat the leading dim as indexing samples, and find the 100th quantile (max) # over all such samples. x = [[1., 2.] [3., 4.]] tfp.stats.percentile(x, q=100., axis=[0]) ==> [3., 4.] ``` Compare to `numpy.percentile`. Args: x: Floating point `N-D` `Tensor` with `N > 0`. If `axis` is not `None`, `x` must have statically known number of dimensions. q: Scalar or vector `Tensor` with values in `[0, 100]`. The percentile(s). axis: Optional `0-D` or `1-D` integer `Tensor` with constant values. The axis that hold independent samples over which to return the desired percentile. If `None` (the default), treat every dimension as a sample dimension, returning a scalar. interpolation : {'nearest', 'linear', 'lower', 'higher', 'midpoint'}. Default value: 'nearest'. This specifies the interpolation method to use when the desired quantile lies between two data points `i < j`: * linear: i + (j - i) * fraction, where fraction is the fractional part of the index surrounded by i and j. * lower: `i`. * higher: `j`. * nearest: `i` or `j`, whichever is nearest. * midpoint: (i + j) / 2. `linear` and `midpoint` interpolation do not work with integer dtypes. keep_dims: Python `bool`. If `True`, the last dimension is kept with size 1 If `False`, the last dimension is removed from the output shape. validate_args: Whether to add runtime checks of argument validity. If False, and arguments are incorrect, correct behavior is not guaranteed. preserve_gradients: Python `bool`. If `True`, ensure that gradient w.r.t the percentile `q` is preserved in the case of linear interpolation. If `False`, the gradient will be (incorrectly) zero when `q` corresponds to a point in `x`. name: A Python string name to give this `Op`. Default is 'percentile' Returns: A `(rank(q) + N - len(axis))` dimensional `Tensor` of same dtype as `x`, or, if `axis` is `None`, a `rank(q)` `Tensor`. The first `rank(q)` dimensions index quantiles for different values of `q`. Raises: ValueError: If argument 'interpolation' is not an allowed type. ValueError: If interpolation type not compatible with `dtype`. """ name = name or 'percentile' allowed_interpolations = { 'linear', 'lower', 'higher', 'nearest', 'midpoint' } if interpolation is None: interpolation = 'nearest' else: if interpolation not in allowed_interpolations: raise ValueError( 'Argument `interpolation` must be in %s. Found %s' % (allowed_interpolations, interpolation)) with tf.name_scope(name, values=[x, q]): x = tf.convert_to_tensor(x, name='x') if interpolation in {'linear', 'midpoint'} and x.dtype.is_integer: raise TypeError( '{} interpolation not allowed with dtype {}'.format( interpolation, x.dtype)) # Double is needed here and below, else we get the wrong index if the array # is huge along axis. q = tf.cast(q, tf.float64) _get_static_ndims(q, expect_ndims_no_more_than=1) if validate_args: q = control_flow_ops.with_dependencies([ tf.assert_rank_in(q, [0, 1]), tf.assert_greater_equal(q, tf.cast(0., tf.float64)), tf.assert_less_equal(q, tf.cast(100., tf.float64)) ], q) if axis is None: y = tf.reshape(x, [-1]) else: axis = tf.convert_to_tensor(axis, name='axis', dtype=tf.int32) tf.assert_integer(axis) axis_ndims = _get_static_ndims(axis, expect_static=True, expect_ndims_no_more_than=1) axis_const = tf.contrib.util.constant_value(axis) if axis_const is None: raise ValueError( 'Expected argument `axis` to be statically available. Found: %s' % axis) axis = axis_const if axis_ndims == 0: axis = [axis] axis = [int(a) for a in axis] x_ndims = _get_static_ndims(x, expect_static=True, expect_ndims_at_least=1) axis = _make_static_axis_non_negative(axis, x_ndims) # Move dims in axis to the end, since _sort_tensor, which calls top_k, # only sorts the last dim. y = _move_dims_to_flat_end(x, axis, x_ndims) frac_at_q_or_above = 1. - q / 100. # Sort everything, not just the top 'k' entries, which allows multiple calls # to sort only once (under the hood) and use CSE. sorted_y = _sort_tensor(y) d = tf.cast(tf.shape(y)[-1], tf.float64) def _get_indices(interp_type): """Get values of y at the indices implied by interp_type.""" # Note `lower` <--> ceiling. Confusing, huh? Due to the fact that # _sort_tensor sorts highest to lowest, tf.ceil corresponds to the higher # index, but the lower value of y! if interp_type == 'lower': indices = tf.ceil((d - 1) * frac_at_q_or_above) elif interp_type == 'higher': indices = tf.floor((d - 1) * frac_at_q_or_above) elif interp_type == 'nearest': indices = tf.round((d - 1) * frac_at_q_or_above) # d - 1 will be distinct from d in int32, but not necessarily double. # So clip to avoid out of bounds errors. return tf.clip_by_value(tf.cast(indices, tf.int32), 0, tf.shape(y)[-1] - 1) if interpolation in ['nearest', 'lower', 'higher']: gathered_y = tf.gather(sorted_y, _get_indices(interpolation), axis=-1) elif interpolation == 'midpoint': gathered_y = 0.5 * ( tf.gather(sorted_y, _get_indices('lower'), axis=-1) + tf.gather(sorted_y, _get_indices('higher'), axis=-1)) elif interpolation == 'linear': # Copy-paste of docstring on interpolation: # linear: i + (j - i) * fraction, where fraction is the fractional part # of the index surrounded by i and j. larger_y_idx = _get_indices('lower') exact_idx = (d - 1) * frac_at_q_or_above if preserve_gradients: # If q cooresponds to a point in x, we will initially have # larger_y_idx == smaller_y_idx. # This results in the gradient w.r.t. fraction being zero (recall `q` # enters only through `fraction`...and see that things cancel). # The fix is to ensure that smaller_y_idx and larger_y_idx are always # separated by exactly 1. smaller_y_idx = tf.maximum(larger_y_idx - 1, 0) larger_y_idx = tf.minimum(smaller_y_idx + 1, tf.shape(y)[-1] - 1) fraction = tf.cast(larger_y_idx, tf.float64) - exact_idx else: smaller_y_idx = _get_indices('higher') fraction = tf.ceil((d - 1) * frac_at_q_or_above) - exact_idx fraction = tf.cast(fraction, y.dtype) gathered_y = ( tf.gather(sorted_y, larger_y_idx, axis=-1) * (1 - fraction) + tf.gather(sorted_y, smaller_y_idx, axis=-1) * fraction) if keep_dims: if axis is None: ones_vec = tf.ones(shape=[ _get_best_effort_ndims(x) + _get_best_effort_ndims(q) ], dtype=tf.int32) gathered_y *= tf.ones(ones_vec, dtype=x.dtype) else: gathered_y = _insert_back_keep_dims(gathered_y, axis) # If q is a scalar, then result has the right shape. # If q is a vector, then result has trailing dim of shape q.shape, which # needs to be rotated to dim 0. return util.rotate_transpose(gathered_y, tf.rank(q))
def __init__(self, batch_size, total_num_examples, max_learning_rate=1., preconditioner_decay_rate=0.95, burnin=25, burnin_max_learning_rate=1e-6, use_single_learning_rate=False, name=None, variable_scope=None): default_name = 'VariationalSGD' with tf.name_scope(name, default_name, [ max_learning_rate, preconditioner_decay_rate, batch_size, burnin, burnin_max_learning_rate ]): if variable_scope is None: var_scope_name = tf.get_default_graph().unique_name( name or default_name) with tf.variable_scope(var_scope_name) as scope: self._variable_scope = scope else: self._variable_scope = variable_scope self._preconditioner_decay_rate = tf.convert_to_tensor( preconditioner_decay_rate, name='preconditioner_decay_rate') self._batch_size = tf.convert_to_tensor(batch_size, name='batch_size') self._total_num_examples = tf.convert_to_tensor( total_num_examples, name='total_num_examples') self._burnin = tf.convert_to_tensor(burnin, name='burnin') self._burnin_max_learning_rate = tf.convert_to_tensor( burnin_max_learning_rate, name='burnin_max_learning_rate') self._max_learning_rate = tf.convert_to_tensor( max_learning_rate, name='max_learning_rate') self._use_single_learning_rate = use_single_learning_rate with tf.variable_scope(self._variable_scope): self._counter = tf.get_variable( 'counter', initializer=0, trainable=False) self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._preconditioner_decay_rate, message='`preconditioner_decay_rate` must be non-negative'), tf.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.'), ], self._preconditioner_decay_rate) self._batch_size = control_flow_ops.with_dependencies([ tf.assert_greater( self._batch_size, 0, message='`batch_size` must be greater than zero') ], self._batch_size) self._total_num_examples = control_flow_ops.with_dependencies([ tf.assert_greater( self._total_num_examples, 0, message='`total_num_examples` must be greater than zero') ], self._total_num_examples) self._burnin = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf.assert_integer( self._burnin, message='`burnin` must be an integer') ], self._burnin) self._burnin_max_learning_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin_max_learning_rate, message='`burnin_max_learning_rate` must be non-negative') ], self._burnin_max_learning_rate) self._max_learning_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._max_learning_rate, message='`max_learning_rate` must be non-negative') ], self._max_learning_rate) super(VariationalSGD, self).__init__( use_locking=False, name=name or default_name)
def slice_with_actions(tensor, actions): """ Slices given tensor with actions along the second dimension. """ tf.assert_integer(actions) batch_range = tf.range(tf.shape(tensor)[0]) indices = tf.stack([batch_range, actions], axis=-1) return tf.gather_nd(tensor, indices)
def __init__(self, learning_rate, preconditioner_decay_rate=0.95, data_size=1, burnin=25, diagonal_bias=1e-8, name=None, parallel_iterations=10, variable_scope=None): default_name = 'StochasticGradientLangevinDynamics' with tf.name_scope(name, default_name, [ learning_rate, preconditioner_decay_rate, data_size, burnin, diagonal_bias ]): if tf.executing_eagerly(): raise NotImplementedError( 'Eager execution currently not supported for ' ' SGLD optimizer.') if variable_scope is None: var_scope_name = tf.get_default_graph().unique_name( name or default_name) with tf.variable_scope(var_scope_name) as scope: self._variable_scope = scope else: self._variable_scope = variable_scope self._preconditioner_decay_rate = tf.convert_to_tensor( preconditioner_decay_rate, name='preconditioner_decay_rate') self._data_size = tf.convert_to_tensor(data_size, name='data_size') self._burnin = tf.convert_to_tensor(burnin, name='burnin') self._diagonal_bias = tf.convert_to_tensor(diagonal_bias, name='diagonal_bias') self._learning_rate = tf.convert_to_tensor(learning_rate, name='learning_rate') self._parallel_iterations = parallel_iterations with tf.variable_scope(self._variable_scope): self._counter = tf.get_variable('counter', initializer=0, trainable=False) self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._preconditioner_decay_rate, message='`preconditioner_decay_rate` must be non-negative' ), tf.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.'), ], self._preconditioner_decay_rate) self._data_size = control_flow_ops.with_dependencies([ tf.assert_greater( self._data_size, 0, message='`data_size` must be greater than zero') ], self._data_size) self._burnin = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf.assert_integer(self._burnin, message='`burnin` must be an integer') ], self._burnin) self._diagonal_bias = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._diagonal_bias, message='`diagonal_bias` must be non-negative') ], self._diagonal_bias) super(StochasticGradientLangevinDynamics, self).__init__(use_locking=False, name=name or default_name)
def percentile(x, q, axis=None, interpolation=None, keep_dims=False, validate_args=False, name=None): """Compute the `q`-th percentile of `x`. Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the way from the minimum to the maximum in a sorted copy of `x`. The values and distances of the two nearest neighbors as well as the `interpolation` parameter will determine the percentile if the normalized ranking does not match the location of `q` exactly. This function is the same as the median if `q = 50`, the same as the minimum if `q = 0` and the same as the maximum if `q = 100`. ```python # Get 30th percentile with default ('nearest') interpolation. x = [1., 2., 3., 4.] percentile(x, q=30.) ==> 2.0 # Get 30th percentile with 'lower' interpolation x = [1., 2., 3., 4.] percentile(x, q=30., interpolation='lower') ==> 1.0 # Get 100th percentile (maximum). By default, this is computed over every dim x = [[1., 2.] [3., 4.]] percentile(x, q=100.) ==> 4.0 # Treat the leading dim as indexing samples, and find the 100th quantile (max) # over all such samples. x = [[1., 2.] [3., 4.]] percentile(x, q=100., axis=[0]) ==> [3., 4.] ``` Compare to `numpy.percentile`. Args: x: Floating point `N-D` `Tensor` with `N > 0`. If `axis` is not `None`, `x` must have statically known number of dimensions. q: Scalar `Tensor` in `[0, 100]`. The percentile. axis: Optional `0-D` or `1-D` integer `Tensor` with constant values. The axis that hold independent samples over which to return the desired percentile. If `None` (the default), treat every dimension as a sample dimension, returning a scalar. interpolation : {"lower", "higher", "nearest"}. Default: "nearest" This optional parameter specifies the interpolation method to use when the desired quantile lies between two data points `i < j`: * lower: `i`. * higher: `j`. * nearest: `i` or `j`, whichever is nearest. keep_dims: Python `bool`. If `True`, the last dimension is kept with size 1 If `False`, the last dimension is removed from the output shape. validate_args: Whether to add runtime checks of argument validity. If False, and arguments are incorrect, correct behavior is not guaranteed. name: A Python string name to give this `Op`. Default is "percentile" Returns: A `(N - len(axis))` dimensional `Tensor` of same dtype as `x`, or, if `axis` is `None`, a scalar. Raises: ValueError: If argument 'interpolation' is not an allowed type. """ name = name or "percentile" allowed_interpolations = {"lower", "higher", "nearest"} if interpolation is None: interpolation = "nearest" else: if interpolation not in allowed_interpolations: raise ValueError( "Argument 'interpolation' must be in %s. Found %s" % (allowed_interpolations, interpolation)) with tf.name_scope(name, [x, q]): x = tf.convert_to_tensor(x, name="x") # Double is needed here and below, else we get the wrong index if the array # is huge along axis. q = tf.to_double(q, name="q") _get_static_ndims(q, expect_ndims=0) if validate_args: q = control_flow_ops.with_dependencies([ tf.assert_rank(q, 0), tf.assert_greater_equal(q, tf.to_double(0.)), tf.assert_less_equal(q, tf.to_double(100.)) ], q) if axis is None: y = tf.reshape(x, [-1]) else: axis = tf.convert_to_tensor(axis, name="axis") tf.assert_integer(axis) axis_ndims = _get_static_ndims(axis, expect_static=True, expect_ndims_no_more_than=1) axis_const = tensor_util.constant_value(axis) if axis_const is None: raise ValueError( "Expected argument 'axis' to be statically available. Found: %s" % axis) axis = axis_const if axis_ndims == 0: axis = [axis] axis = [int(a) for a in axis] x_ndims = _get_static_ndims(x, expect_static=True, expect_ndims_at_least=1) axis = _make_static_axis_non_negative(axis, x_ndims) y = _move_dims_to_flat_end(x, axis, x_ndims) frac_at_q_or_above = 1. - q / 100. d = tf.to_double(tf.shape(y)[-1]) if interpolation == "lower": index = tf.ceil((d - 1) * frac_at_q_or_above) elif interpolation == "higher": index = tf.floor((d - 1) * frac_at_q_or_above) elif interpolation == "nearest": index = tf.round((d - 1) * frac_at_q_or_above) # If d is gigantic, then we would have d == d - 1, even in double... So # let's use max/min to avoid out of bounds errors. d = tf.shape(y)[-1] # d - 1 will be distinct from d in int32. index = tf.clip_by_value(tf.to_int32(index), 0, d - 1) # Sort everything, not just the top 'k' entries, which allows multiple calls # to sort only once (under the hood) and use CSE. sorted_y = _sort_tensor(y) # result.shape = B result = sorted_y[..., index] result.set_shape(y.get_shape()[:-1]) if keep_dims: if axis is None: # ones_vec = [1, 1,..., 1], total length = len(S) + len(B). ones_vec = tf.ones(shape=[_get_best_effort_ndims(x)], dtype=tf.int32) result *= tf.ones(ones_vec, dtype=x.dtype) else: result = _insert_back_keep_dims(result, axis) return result