示例#1
0
def _compute_energy_change(current_target_log_prob,
                           current_momentums,
                           proposed_target_log_prob,
                           proposed_momentums,
                           independent_chain_ndims,
                           name=None):
    """Helper to `kernel` which computes the energy change."""
    with ops.name_scope(name, "compute_energy_change", ([
            current_target_log_prob, proposed_target_log_prob,
            independent_chain_ndims
    ] + current_momentums + proposed_momentums)):
        # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy
        # since they're a mouthful and lets us inline more.
        lk0, lk1 = [], []
        for current_momentum, proposed_momentum in zip(current_momentums,
                                                       proposed_momentums):
            axis = math_ops.range(independent_chain_ndims,
                                  array_ops.rank(current_momentum))
            lk0.append(_log_sum_sq(current_momentum, axis))
            lk1.append(_log_sum_sq(proposed_momentum, axis))

        lk0 = -np.log(2.) + math_ops.reduce_logsumexp(
            array_ops.stack(lk0, axis=-1), axis=-1)
        lk1 = -np.log(2.) + math_ops.reduce_logsumexp(
            array_ops.stack(lk1, axis=-1), axis=-1)
        lp0 = -current_target_log_prob  # log_potential
        lp1 = -proposed_target_log_prob  # proposed_log_potential
        x = array_ops.stack(
            [lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)], axis=-1)

        # The sum is NaN if any element is NaN or we see both +Inf and -Inf.
        # Thus we will replace such rows with infinite energy change which implies
        # rejection. Recall that float-comparisons with NaN are always False.
        is_sum_determinate = (
            math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1)
            & math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1))
        is_sum_determinate = array_ops.tile(
            is_sum_determinate[..., array_ops.newaxis],
            multiples=array_ops.concat([
                array_ops.ones(array_ops.rank(is_sum_determinate),
                               dtype=dtypes.int32),
                [4],
            ],
                                       axis=0))
        x = array_ops.where(
            is_sum_determinate, x,
            array_ops.fill(array_ops.shape(x),
                           value=x.dtype.as_numpy_dtype(np.inf)))

        return math_ops.reduce_sum(x, axis=-1)
示例#2
0
def _compute_energy_change(current_target_log_prob,
                           current_momentums,
                           proposed_target_log_prob,
                           proposed_momentums,
                           independent_chain_ndims,
                           name=None):
  """Helper to `kernel` which computes the energy change."""
  with ops.name_scope(
      name, "compute_energy_change",
      ([current_target_log_prob, proposed_target_log_prob,
        independent_chain_ndims] +
       current_momentums + proposed_momentums)):
    # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy
    # since they're a mouthful and lets us inline more.
    lk0, lk1 = [], []
    for current_momentum, proposed_momentum in zip(current_momentums,
                                                   proposed_momentums):
      axis = math_ops.range(independent_chain_ndims,
                            array_ops.rank(current_momentum))
      lk0.append(_log_sum_sq(current_momentum, axis))
      lk1.append(_log_sum_sq(proposed_momentum, axis))

    lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1),
                                                  axis=-1)
    lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1),
                                                  axis=-1)
    lp0 = -current_target_log_prob   # log_potential
    lp1 = -proposed_target_log_prob  # proposed_log_potential
    x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)],
                        axis=-1)

    # The sum is NaN if any element is NaN or we see both +Inf and -Inf.
    # Thus we will replace such rows with infinite energy change which implies
    # rejection. Recall that float-comparisons with NaN are always False.
    is_sum_determinate = (
        math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) &
        math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1))
    is_sum_determinate = array_ops.tile(
        is_sum_determinate[..., array_ops.newaxis],
        multiples=array_ops.concat([
            array_ops.ones(array_ops.rank(is_sum_determinate),
                           dtype=dtypes.int32),
            [4],
        ], axis=0))
    x = array_ops.where(is_sum_determinate,
                        x,
                        array_ops.fill(array_ops.shape(x),
                                       value=x.dtype.as_numpy_dtype(np.inf)))

    return math_ops.reduce_sum(x, axis=-1)
示例#3
0
def _is_all_finite(grads):
    """Returns a scalar boolean tensor indicating if all gradients are finite."""
    is_finite_per_grad = [
        math_ops.reduce_all(math_ops.is_finite(g)) for g in grads
        if g is not None
    ]
    return math_ops.reduce_all(is_finite_per_grad)
示例#4
0
    def adaptive_runge_kutta_step(rk_state, history, n_steps):
        """Take an adaptive Runge-Kutta step to integrate the ODE."""

        y0, f0, _, t0, dt, interp_coeff = rk_state
        check_underflow = control_flow_ops.Assert(t0 + dt > t0,
                                                  ['underflow in dt', dt])
        check_max_num_steps = control_flow_ops.Assert(
            n_steps < max_num_steps, ['max_num_steps exceeded'])
        check_numerics = control_flow_ops.Assert(
            math_ops.reduce_all(math_ops.is_finite(abs(y0))),
            ['non-finite values in state `y`', y0])

        y1, f1, y1_error, k = _runge_kutta_step(func, y0, f0, t0, dt)

        error_tol = atol + rtol * math_ops.maximum(abs(y0), abs(y1))
        tensor_error_ratio = _abs_square(y1_error) / _abs_square(error_tol)
        error_ratio = math_ops.sqrt(math_ops.reduce_mean(tensor_error_ratio))
        accept_step = error_ratio <= 1

        y_next = control_flow_ops.cond(accept_step, lambda: y1, lambda: y0)
        f_next = control_flow_ops.cond(accept_step, lambda: f1, lambda: f0)
        t_next = control_flow_ops.cond(accept_step, lambda: t0 + dt,
                                       lambda: t0)
        interp_coeff = control_flow_ops.cond(
            accept_step, lambda: _interp_fit_rk(y0, y1, k, dt),
            lambda: interp_coeff)
        dt_next = _optimal_step_size(dt, error_ratio, safety, ifactor, dfactor)
        rk_state = _RungeKuttaState(y_next, f_next, t0, t_next, dt_next,
                                    interp_coeff)

        history = _History(_ta_append(history.integrate_points, t0 + dt),
                           _ta_append(history.error_ratio, error_ratio))
        return rk_state, history, n_steps + 1
示例#5
0
        def adaptive_runge_kutta_step(rk_state, history, n_steps):
            """Take an adaptive Runge-Kutta step to integrate the ODE."""
            ys0, fs0, _, t0, us0, dt, interp_coeff = rk_state
            with ops.name_scope('assertions'):
                check_underflow = control_flow_ops.Assert(
                    (t0 + dt > t0 and first_step > 0)
                    or (t0 + dt < t0 and first_step < 0),
                    ['underflow in dt', dt])
                check_max_num_steps = control_flow_ops.Assert(
                    n_steps < max_num_steps, ['max_num_steps exceeded'])
                check_numerics = _traverse_and_return_flattened(
                    ys0, lambda y, _: control_flow_ops.Assert(
                        math_ops.reduce_all(math_ops.is_finite(abs(y))),
                        ['non-finite values in state `y`', y]))
            with ops.control_dependencies(
                [check_underflow, check_max_num_steps] + check_numerics):
                ys1, fs1, ys1_error, ks = _runge_kutta_step(
                    func, ys0, fs0, t0, us0, dt)

            with ops.name_scope('error_ratio'):
                # We use the same approach as the dopri5 fortran code.
                error_tol = _multi_traverse_and_return_nested(
                    [ys0, ys1],
                    lambda y0, y1, _: atol + rtol * math_ops.maximum(
                        abs(y0), abs(y1)))
                tensor_error_ratio = _multi_traverse_and_return_nested(
                    [ys1_error, error_tol],
                    lambda err, tol, _: _abs_square(err) / _abs_square(tol))
                # Could also use reduce_maximum here.
                error_ratio = math_ops.sqrt(
                    math_ops.reduce_mean(
                        _traverse_and_return_flattened(
                            tensor_error_ratio,
                            lambda err, _: math_ops.reduce_mean(err))))
                accept_step = error_ratio <= 1

            with ops.name_scope('update/rk_state'):
                # If we don't accept the step, the _RungeKuttaState will be useless
                # (covering a time-interval of size 0), but that's OK, because in such
                # cases we always immediately take another Runge-Kutta step.
                ys_next = control_flow_ops.cond(accept_step, lambda: ys1,
                                                lambda: ys0)
                fs_next = control_flow_ops.cond(accept_step, lambda: fs1,
                                                lambda: fs0)
                ts_next = control_flow_ops.cond(accept_step, lambda: t0 + dt,
                                                lambda: t0)
                us_next = us0
                interp_coeff = control_flow_ops.cond(
                    accept_step, lambda: _interp_fit_rk(ys0, ys1, ks, dt),
                    lambda: interp_coeff)
                dt_next = _optimal_step_size(dt, error_ratio, safety, ifactor,
                                             dfactor)
                rk_state = _RungeKuttaState(ys_next, fs_next, t0, ts_next,
                                            us_next, dt_next, interp_coeff)

            with ops.name_scope('update/history'):
                history = _History(
                    _ta_append(history.integrate_points, t0 + dt),
                    _ta_append(history.error_ratio, error_ratio))
            return rk_state, history, n_steps + 1
示例#6
0
 def _compare(self, x, use_gpu):
   np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
   with test_util.device(use_gpu=use_gpu):
     inx = ops.convert_to_tensor(x)
     ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf(
         inx), math_ops.is_nan(inx)
     tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan])
   self.assertAllEqual(np_inf, tf_inf)
   self.assertAllEqual(np_nan, tf_nan)
   self.assertAllEqual(np_finite, tf_finite)
   self.assertShapeEqual(np_inf, oinf)
   self.assertShapeEqual(np_nan, onan)
   self.assertShapeEqual(np_finite, ofinite)
示例#7
0
 def _compare(self, x, use_gpu):
   np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
   with self.test_session(
       use_gpu=use_gpu,
       force_gpu=use_gpu and test_util.is_gpu_available()) as sess:
     inx = ops.convert_to_tensor(x)
     ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf(
         inx), math_ops.is_nan(inx)
     tf_finite, tf_inf, tf_nan = sess.run([ofinite, oinf, onan])
   self.assertAllEqual(np_inf, tf_inf)
   self.assertAllEqual(np_nan, tf_nan)
   self.assertAllEqual(np_finite, tf_finite)
   self.assertShapeEqual(np_inf, oinf)
   self.assertShapeEqual(np_nan, onan)
   self.assertShapeEqual(np_finite, ofinite)
示例#8
0
        def adaptive_runge_kutta_step(rk_state, history, n_steps):
            """Take an adaptive Runge-Kutta step to integrate the ODE."""
            y0, f0, _, t0, dt, interp_coeff = rk_state
            with ops.name_scope('assertions'):
                check_underflow = control_flow_ops.Assert(
                    t0 + dt > t0, ['underflow in dt', dt])
                check_max_num_steps = control_flow_ops.Assert(
                    n_steps < max_num_steps, ['max_num_steps exceeded'])
                check_numerics = control_flow_ops.Assert(
                    math_ops.reduce_all(math_ops.is_finite(abs(y0))),
                    ['non-finite values in state `y`', y0])
            with ops.control_dependencies(
                [check_underflow, check_max_num_steps, check_numerics]):
                y1, f1, y1_error, k = _runge_kutta_step(func, y0, f0, t0, dt)

            with ops.name_scope('error_ratio'):
                # We use the same approach as the dopri5 fortran code.
                error_tol = atol + rtol * math_ops.maximum(abs(y0), abs(y1))
                tensor_error_ratio = _abs_square(y1_error) / _abs_square(
                    error_tol)
                # Could also use reduce_maximum here.
                error_ratio = math_ops.sqrt(
                    math_ops.reduce_mean(tensor_error_ratio))
                accept_step = error_ratio <= 1

            with ops.name_scope('update/rk_state'):
                # If we don't accept the step, the _RungeKuttaState will be useless
                # (covering a time-interval of size 0), but that's OK, because in such
                # cases we always immediately take another Runge-Kutta step.
                y_next = control_flow_ops.cond(accept_step, lambda: y1,
                                               lambda: y0)
                f_next = control_flow_ops.cond(accept_step, lambda: f1,
                                               lambda: f0)
                t_next = control_flow_ops.cond(accept_step, lambda: t0 + dt,
                                               lambda: t0)
                interp_coeff = control_flow_ops.cond(
                    accept_step, lambda: _interp_fit_rk(y0, y1, k, dt),
                    lambda: interp_coeff)
                dt_next = _optimal_step_size(dt, error_ratio, safety, ifactor,
                                             dfactor)
                rk_state = _RungeKuttaState(y_next, f_next, t0, t_next,
                                            dt_next, interp_coeff)

            with ops.name_scope('update/history'):
                history = _History(
                    _ta_append(history.integrate_points, t0 + dt),
                    _ta_append(history.error_ratio, error_ratio))
            return rk_state, history, n_steps + 1
示例#9
0
 def _compare(self, x, use_gpu):
   with test_util.device(use_gpu=use_gpu):
     inx = ops.convert_to_tensor(x)
     ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf(
         inx), math_ops.is_nan(inx)
     tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan])
   if x.dtype == dtypes_lib.bfloat16.as_numpy_dtype:
     # Numpy will implicitly convert bfloat16 value to float16, so we cast to
     # float32 to avoid this.
     x = x.astype(np.float32)
   np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
   self.assertAllEqual(np_inf, tf_inf)
   self.assertAllEqual(np_nan, tf_nan)
   self.assertAllEqual(np_finite, tf_finite)
   self.assertShapeEqual(np_inf, oinf)
   self.assertShapeEqual(np_nan, onan)
   self.assertShapeEqual(np_finite, ofinite)
示例#10
0
    def adaptive_runge_kutta_step(rk_state, history, n_steps):
      """Take an adaptive Runge-Kutta step to integrate the ODE."""
      y0, f0, _, t0, dt, interp_coeff = rk_state
      with ops.name_scope('assertions'):
        check_underflow = control_flow_ops.Assert(t0 + dt > t0,
                                                  ['underflow in dt', dt])
        check_max_num_steps = control_flow_ops.Assert(
            n_steps < max_num_steps, ['max_num_steps exceeded'])
        check_numerics = control_flow_ops.Assert(
            math_ops.reduce_all(math_ops.is_finite(abs(y0))),
            ['non-finite values in state `y`', y0])
      with ops.control_dependencies(
          [check_underflow, check_max_num_steps, check_numerics]):
        y1, f1, y1_error, k = _runge_kutta_step(func, y0, f0, t0, dt)

      with ops.name_scope('error_ratio'):
        # We use the same approach as the dopri5 fortran code.
        error_tol = atol + rtol * math_ops.maximum(abs(y0), abs(y1))
        tensor_error_ratio = _abs_square(y1_error) / _abs_square(error_tol)
        # Could also use reduce_maximum here.
        error_ratio = math_ops.sqrt(math_ops.reduce_mean(tensor_error_ratio))
        accept_step = error_ratio <= 1

      with ops.name_scope('update/rk_state'):
        # If we don't accept the step, the _RungeKuttaState will be useless
        # (covering a time-interval of size 0), but that's OK, because in such
        # cases we always immediately take another Runge-Kutta step.
        y_next = control_flow_ops.cond(accept_step, lambda: y1, lambda: y0)
        f_next = control_flow_ops.cond(accept_step, lambda: f1, lambda: f0)
        t_next = control_flow_ops.cond(accept_step, lambda: t0 + dt, lambda: t0)
        interp_coeff = control_flow_ops.cond(
            accept_step, lambda: _interp_fit_rk(y0, y1, k, dt),
            lambda: interp_coeff)
        dt_next = _optimal_step_size(dt, error_ratio, safety, ifactor, dfactor)
        rk_state = _RungeKuttaState(y_next, f_next, t0, t_next, dt_next,
                                    interp_coeff)

      with ops.name_scope('update/history'):
        history = _History(
            _ta_append(history.integrate_points, t0 + dt),
            _ta_append(history.error_ratio, error_ratio))
      return rk_state, history, n_steps + 1
示例#11
0
def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
    """Clips values of multiple tensors by the ratio of the sum of their norms.

  Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
  this operation returns a list of clipped tensors `list_clipped`
  and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
  if you've already computed the global norm for `t_list`, you can specify
  the global norm with `use_norm`.

  To perform the clipping, the values `t_list[i]` are set to:

      t_list[i] * clip_norm / max(global_norm, clip_norm)

  where:

      global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))

  If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
  otherwise they're all shrunk by the global ratio.

  If `global_norm == infinity` then the entries in `t_list` are all set to `NaN`
  to signal that an error occurred.

  Any of the entries of `t_list` that are of type `None` are ignored.

  This is the correct way to perform gradient clipping (for example, see
  [Pascanu et al., 2012](http://arxiv.org/abs/1211.5063)
  ([pdf](http://arxiv.org/pdf/1211.5063.pdf))).

  However, it is slower than `clip_by_norm()` because all the parameters must be
  ready before the clipping operation can be performed.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
    use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
      norm to use. If not provided, `global_norm()` is used to compute the norm.
    name: A name for the operation (optional).

  Returns:
    list_clipped: A list of `Tensors` of the same type as `list_t`.
    global_norm: A 0-D (scalar) `Tensor` representing the global norm.

  Raises:
    TypeError: If `t_list` is not a sequence.
  """
    if (not isinstance(t_list, collections_abc.Sequence)
            or isinstance(t_list, six.string_types)):
        raise TypeError("t_list should be a sequence")
    t_list = list(t_list)
    if use_norm is None:
        use_norm = global_norm(t_list, name)

    with ops.name_scope(name, "clip_by_global_norm",
                        t_list + [clip_norm]) as name:
        # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
        scale_for_finite = clip_norm * math_ops.minimum(
            1.0 / use_norm,
            constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)
        scale = array_ops.where(
            math_ops.is_finite(use_norm),
            scale_for_finite,
            # Return NaN if use_norm is not finite.
            constant_op.constant(float("nan"), dtype=use_norm.dtype))

        values = [
            ops.convert_to_tensor(
                t.values if isinstance(t, ops.IndexedSlices) else t,
                name="t_%d" % i) if t is not None else t
            for i, t in enumerate(t_list)
        ]

        values_clipped = []
        for i, v in enumerate(values):
            if v is None:
                values_clipped.append(None)
            else:
                with ops.colocate_with(v):
                    values_clipped.append(
                        array_ops.identity(v * scale,
                                           name="%s_%d" % (name, i)))

        list_clipped = [
            ops.IndexedSlices(c_v, t.indices, t.dense_shape) if isinstance(
                t, ops.IndexedSlices) else c_v
            for (c_v, t) in zip(values_clipped, t_list)
        ]

    return list_clipped, use_norm
示例#12
0
def _assign_if_finite(var, value):
    """Assigns a value to a variable if the value is finite."""
    return control_flow_ops.cond(math_ops.is_finite(value),
                                 lambda: _op_in_graph_mode(var.assign(value)),
                                 control_flow_ops.no_op)
示例#13
0
def matrix_exponential(input, name=None):  # pylint: disable=redefined-builtin
  r"""Computes the matrix exponential of one or more square matrices.

  exp(A) = \sum_{n=0}^\infty A^n/n!

  The exponential is computed using a combination of the scaling and squaring
  method and the Pade approximation. Details can be found in:
  Nicholas J. Higham, "The scaling and squaring method for the matrix
  exponential revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.

  The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
  form square matrices. The output is a tensor of the same shape as the input
  containing the exponential for all input submatrices `[..., :, :]`.

  Args:
    input: A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`, or
      `complex128` with shape `[..., M, M]`.
    name:  A name to give this `Op` (optional).

  Returns:
    the matrix exponential of the input.

  Raises:
    ValueError: An unsupported type is provided as input.

  @compatibility(scipy)
  Equivalent to scipy.linalg.expm
  @end_compatibility
  """
  with ops.name_scope(name, 'matrix_exponential', [input]):
    matrix = ops.convert_to_tensor(input, name='input')
    if matrix.shape[-2:] == [0, 0]:
      return matrix
    batch_shape = matrix.shape[:-2]
    if not batch_shape.is_fully_defined():
      batch_shape = array_ops.shape(matrix)[:-2]

    # reshaping the batch makes the where statements work better
    matrix = array_ops.reshape(
        matrix, array_ops.concat(([-1], array_ops.shape(matrix)[-2:]), axis=0))
    l1_norm = math_ops.reduce_max(
        math_ops.reduce_sum(
            math_ops.abs(matrix),
            axis=array_ops.size(array_ops.shape(matrix)) - 2),
        axis=-1)[..., array_ops.newaxis, array_ops.newaxis]

    const = lambda x: constant_op.constant(x, l1_norm.dtype)

    def _nest_where(vals, cases):
      assert len(vals) == len(cases) - 1
      if len(vals) == 1:
        return array_ops.where_v2(
            math_ops.less(l1_norm, const(vals[0])), cases[0], cases[1])
      else:
        return array_ops.where_v2(
            math_ops.less(l1_norm, const(vals[0])), cases[0],
            _nest_where(vals[1:], cases[1:]))

    if matrix.dtype in [dtypes.float16, dtypes.float32, dtypes.complex64]:
      maxnorm = const(3.925724783138660)
      squarings = math_ops.maximum(
          math_ops.floor(
              math_ops.log(l1_norm / maxnorm) / math_ops.log(const(2.0))), 0)
      u3, v3 = _matrix_exp_pade3(matrix)
      u5, v5 = _matrix_exp_pade5(matrix)
      u7, v7 = _matrix_exp_pade7(
          matrix /
          math_ops.cast(math_ops.pow(const(2.0), squarings), matrix.dtype))
      conds = (4.258730016922831e-001, 1.880152677804762e+000)
      u = _nest_where(conds, (u3, u5, u7))
      v = _nest_where(conds, (v3, v5, v7))
    elif matrix.dtype in [dtypes.float64, dtypes.complex128]:
      maxnorm = const(5.371920351148152)
      squarings = math_ops.maximum(
          math_ops.floor(
              math_ops.log(l1_norm / maxnorm) / math_ops.log(const(2.0))), 0)
      u3, v3 = _matrix_exp_pade3(matrix)
      u5, v5 = _matrix_exp_pade5(matrix)
      u7, v7 = _matrix_exp_pade7(matrix)
      u9, v9 = _matrix_exp_pade9(matrix)
      u13, v13 = _matrix_exp_pade13(
          matrix /
          math_ops.cast(math_ops.pow(const(2.0), squarings), matrix.dtype))
      conds = (1.495585217958292e-002, 2.539398330063230e-001,
               9.504178996162932e-001, 2.097847961257068e+000)
      u = _nest_where(conds, (u3, u5, u7, u9, u13))
      v = _nest_where(conds, (v3, v5, v7, v9, v13))
    else:
      raise ValueError('tf.linalg.expm does not support matrices of type %s' %
                       matrix.dtype)

    is_finite = math_ops.is_finite(math_ops.reduce_max(l1_norm))
    nan = constant_op.constant(np.nan, matrix.dtype)
    result = control_flow_ops.cond(
        is_finite, lambda: linalg_ops.matrix_solve(-u + v, u + v),
        lambda: array_ops.fill(array_ops.shape(matrix), nan))
    max_squarings = math_ops.reduce_max(squarings)
    i = const(0.0)

    def c(i, _):
      return control_flow_ops.cond(is_finite,
                                   lambda: math_ops.less(i, max_squarings),
                                   lambda: constant_op.constant(False))

    def b(i, r):
      return i + 1, array_ops.where_v2(
          math_ops.less(i, squarings), math_ops.matmul(r, r), r)

    _, result = control_flow_ops.while_loop(c, b, [i, result])
    if not matrix.shape.is_fully_defined():
      return array_ops.reshape(
          result,
          array_ops.concat((batch_shape, array_ops.shape(result)[-2:]), axis=0))
    return array_ops.reshape(result, batch_shape.concatenate(result.shape[-2:]))
示例#14
0
def _assign_if_finite(var, value):
  """Assigns a value to a variable if the value is finite."""
  return control_flow_ops.cond(
      math_ops.is_finite(value),
      lambda: _op_in_graph_mode(var.assign(value)),
      control_flow_ops.no_op)
示例#15
0
def _is_all_finite(grads):
  """Returns a scalar boolean tensor indicating if all gradients are finite."""
  is_finite_per_grad = [math_ops.reduce_all(math_ops.is_finite(g))
                        for g in grads]
  return math_ops.reduce_all(is_finite_per_grad)
示例#16
0
def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
  """Clips values of multiple tensors by the ratio of the sum of their norms.

  Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
  this operation returns a list of clipped tensors `list_clipped`
  and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
  if you've already computed the global norm for `t_list`, you can specify
  the global norm with `use_norm`.

  To perform the clipping, the values `t_list[i]` are set to:

      t_list[i] * clip_norm / max(global_norm, clip_norm)

  where:

      global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))

  If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
  otherwise they're all shrunk by the global ratio.

  If `global_norm == infinity` then the entries in `t_list` are all set to `NaN`
  to signal that an error occurred.

  Any of the entries of `t_list` that are of type `None` are ignored.

  This is the correct way to perform gradient clipping (for example, see
  [Pascanu et al., 2012](http://arxiv.org/abs/1211.5063)
  ([pdf](http://arxiv.org/pdf/1211.5063.pdf))).

  However, it is slower than `clip_by_norm()` because all the parameters must be
  ready before the clipping operation can be performed.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
    use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
      norm to use. If not provided, `global_norm()` is used to compute the norm.
    name: A name for the operation (optional).

  Returns:
    list_clipped: A list of `Tensors` of the same type as `list_t`.
    global_norm: A 0-D (scalar) `Tensor` representing the global norm.

  Raises:
    TypeError: If `t_list` is not a sequence.
  """
  if (not isinstance(t_list, collections.Sequence)
      or isinstance(t_list, six.string_types)):
    raise TypeError("t_list should be a sequence")
  t_list = list(t_list)
  if use_norm is None:
    use_norm = global_norm(t_list, name)

  with ops.name_scope(name, "clip_by_global_norm",
                      t_list + [clip_norm]) as name:
    # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
    scale_for_finite = clip_norm * math_ops.minimum(
        1.0 / use_norm,
        constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)
    scale = array_ops.where(
        math_ops.is_finite(use_norm),
        scale_for_finite,
        # Return NaN if use_norm is not finite.
        constant_op.constant(float("nan"), dtype=use_norm.dtype))

    values = [
        ops.convert_to_tensor(
            t.values if isinstance(t, ops.IndexedSlices) else t,
            name="t_%d" % i)
        if t is not None else t
        for i, t in enumerate(t_list)]

    values_clipped = []
    for i, v in enumerate(values):
      if v is None:
        values_clipped.append(None)
      else:
        with ops.colocate_with(v):
          values_clipped.append(
              array_ops.identity(v * scale, name="%s_%d" % (name, i)))

    list_clipped = [
        ops.IndexedSlices(c_v, t.indices, t.dense_shape)
        if isinstance(t, ops.IndexedSlices)
        else c_v
        for (c_v, t) in zip(values_clipped, t_list)]

  return list_clipped, use_norm