Exemplo n.º 1
0
    def compute_gradients(self,
                          loss,
                          var_list=None,
                          gate_gradients=optimizer.Optimizer.GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None):
        """Compute gradients of `loss` for the variables in `var_list`.

    Add rho*elastic_difference to loss to control the exploration
    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
        if not var_list:
            var_list = variables.trainable_variables()

        elastic_difference = [
            math_ops.subtract(v, lv)
            for v, lv in zip(variables.trainable_variables(),
                             [self._local_map[var] for var in var_list])
        ]

        distance_loss = self._rho * math_ops.add_n(
            [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])

        total_loss = loss + distance_loss
        return self._opt.compute_gradients(total_loss, var_list,
                                           gate_gradients, aggregation_method,
                                           colocate_gradients_with_ops,
                                           grad_loss)
  def compute_gradients(self,
                        loss,
                        var_list=None,
                        gate_gradients=optimizer.Optimizer.GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    Add rho*elastic_difference to loss to control the exploration
    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph under
        the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with the
        corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
    if not var_list:
      var_list = variables.trainable_variables()

    elastic_difference = [
        math_ops.subtract(v, lv)
        for v, lv in zip(variables.trainable_variables(),
                         [self._local_map[var] for var in var_list])
    ]

    distance_loss = self._rho * math_ops.add_n(
        [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])

    total_loss = loss + distance_loss
    return self._opt.compute_gradients(total_loss, var_list, gate_gradients,
                                       aggregation_method,
                                       colocate_gradients_with_ops, grad_loss)
Exemplo n.º 3
0
def global_norm(t_list, name=None):
    """Computes the global norm of multiple tensors.

  Given a tuple or list of tensors `t_list`, this operation returns the
  global norm of the elements in all tensors in `t_list`. The global norm is
  computed as:

  `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`

  Any entries in `t_list` that are of type None are ignored.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    name: A name for the operation (optional).

  Returns:
    A 0-D (scalar) `Tensor` of type `float`.

  Raises:
    TypeError: If `t_list` is not a sequence.
  """
    if (not isinstance(t_list, collections_abc.Sequence)
            or isinstance(t_list, six.string_types)):
        raise TypeError("`t_list` should be a sequence of tensors. Received "
                        f"{type(t_list)}.")
    t_list = list(t_list)
    with ops.name_scope(name, "global_norm", t_list) as name:
        values = [
            ops.convert_to_tensor(
                t.values if isinstance(t, ops.IndexedSlices) else t,
                name="t_%d" % i) if t is not None else t
            for i, t in enumerate(t_list)
        ]
        half_squared_norms = []
        for v in values:
            if v is not None:
                with ops.colocate_with(v):
                    half_squared_norms.append(gen_nn_ops.l2_loss(v))

        half_squared_norm = math_ops.reduce_sum(
            array_ops.stack(half_squared_norms))

        norm = math_ops.sqrt(
            half_squared_norm *
            constant_op.constant(2.0, dtype=half_squared_norm.dtype),
            name="global_norm")

    return norm
Exemplo n.º 4
0
def global_norm(t_list, name=None):
  """Computes the global norm of multiple tensors.

  Given a tuple or list of tensors `t_list`, this operation returns the
  global norm of the elements in all tensors in `t_list`. The global norm is
  computed as:

  `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`

  Any entries in `t_list` that are of type None are ignored.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    name: A name for the operation (optional).

  Returns:
    A 0-D (scalar) `Tensor` of type `float`.

  Raises:
    TypeError: If `t_list` is not a sequence.
  """
  if (not isinstance(t_list, collections.Sequence)
      or isinstance(t_list, six.string_types)):
    raise TypeError("t_list should be a sequence")
  t_list = list(t_list)
  with ops.name_scope(name, "global_norm", t_list) as name:
    values = [
        ops.convert_to_tensor(
            t.values if isinstance(t, ops.IndexedSlices) else t,
            name="t_%d" % i)
        if t is not None else t
        for i, t in enumerate(t_list)]
    half_squared_norms = []
    for v in values:
      if v is not None:
        with ops.colocate_with(v):
          half_squared_norms.append(gen_nn_ops.l2_loss(v))

    half_squared_norm = math_ops.reduce_sum(array_ops.stack(half_squared_norms))

    norm = math_ops.sqrt(
        half_squared_norm *
        constant_op.constant(2.0, dtype=half_squared_norm.dtype),
        name="global_norm")

  return norm