Exemplo n.º 1
0
  def gradient(self, target, sources):
    """Computes the gradient using information traced by the tape.

    Args:
      target: the tensor to be differentiated.
      sources: a list of Tensors or Variables, the target will be
       differentiated with respect to the sources.

    Returns:
      a list of Tensors (or IndexedSlices, or None), one for each element in
      `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once.
    """
    if self._tape is None:
      raise RuntimeError("GradientTape.gradient can only be called once "
                         "on non-persistent tapes, and "
                         "only when the context manager has exited.")
    sources = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable)
               else x
               for x in sources]
    grad = imperative_grad.imperative_grad(
        _default_vspace, self._tape, [target], sources)
    if not self._persistent:
      self._tape = None
    return grad
Exemplo n.º 2
0
    def gradient(self, target, sources, output_gradients=None):
        """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: Tensor (or list of tensors) to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
    """
        if self._tape is None:
            raise RuntimeError(
                "GradientTape.gradient can only be called once on "
                "non-persistent tapes.")
        if self._recording:
            if not self._persistent:
                self._pop_tape()
            else:
                logging.log_first_n(
                    logging.WARN,
                    "Calling GradientTape.gradient on a persistent "
                    "tape inside it's context is significantly less "
                    "efficient than calling it outside the context (it "
                    "causes the gradient ops to be recorded on the "
                    "tape, leading to increased CPU and memory usage). "
                    "Only call GradientTape.gradient inside the "
                    "context if you actually want to trace the "
                    "gradient in order to compute higher order "
                    "derrivatives.", 1)

        flat_sources = nest.flatten(sources)
        flat_sources = [_handle_or_self(x) for x in flat_sources]

        if output_gradients is not None:
            output_gradients = [
                None if x is None else ops.convert_to_tensor(x)
                for x in nest.flatten(output_gradients)
            ]

        flat_grad = imperative_grad.imperative_grad(
            _default_vspace,
            self._tape,
            nest.flatten(target),
            flat_sources,
            output_gradients=output_gradients)

        if not self._persistent:
            self._tape = None

        grad = nest.pack_sequence_as(sources, flat_grad)
        return grad
Exemplo n.º 3
0
    def grad_fn(*args, **kwds):
        """Computes the gradient of the wrapped function."""
        this_tape = tape.push_new_tape()
        try:
            end_node = f(*args, **kwds)
            if end_node is None:
                raise ValueError(
                    "Cannot differentiate a function that returns None; "
                    "did you forget to return a value from {}?".format(
                        f.__name__))
        finally:
            tape.pop_tape(this_tape)
        # Note: variables are returned in construction order. This ensures unique
        # order across executions.
        variables = this_tape.watched_variables()
        if not variables:
            raise ValueError("No trainable variables were accessed while the "
                             "function was being computed.")

        sources = [v.handle for v in variables]
        for s in sources:
            if getattr(s, "is_packed", False):
                raise ValueError(
                    "GradientTape.gradient is not supported on packed EagerTensors yet."
                )
        grad = imperative_grad.imperative_grad(this_tape,
                                               nest.flatten(end_node), sources)
        return end_node, list(zip(grad, variables))
Exemplo n.º 4
0
  def grad_fn(*args):
    """Computes the gradient of the wrapped function."""
    this_tape = tape.push_new_tape()
    try:
      end_node = f(*args)
      if end_node is None:
        raise ValueError("Cannot differentiate a function that returns None; "
                         "did you forget to return a value from {}?".format(
                             f.__name__))
    finally:
      tape.pop_tape(this_tape)
    # Sorting variables by id, which is monotonically increasing in construction
    # order. This ensures unique order across executions.
    variables = list(sorted(this_tape.watched_variables(),
                            key=lambda v: v.handle._id))  # pylint: disable=protected-access
    sources = [x.handle for x in variables]

    if not sources:
      raise ValueError("No trainable variables were accessed while the "
                       "function was being computed.")
    grad = imperative_grad.imperative_grad(_default_vspace,
                                           this_tape,
                                           nest.flatten(end_node),
                                           sources)
    return end_node, list(zip(grad, variables))
Exemplo n.º 5
0
    def gradient(self, target, sources):
        """Computes the gradient using information traced by the tape.

    Args:
      target: the tensor to be differentiated.
      sources: a list of Tensors or Variables, the target will be
       differentiated with respect to the sources.

    Returns:
      a list of Tensors (or IndexedSlices, or None), one for each element in
      `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once.
    """
        if self._tape is None:
            raise RuntimeError("GradientTape.gradient can only be called once "
                               "on non-persistent tapes, and "
                               "only when the context manager has exited.")
        sources = [
            x.handle
            if isinstance(x, resource_variable_ops.ResourceVariable) else x
            for x in sources
        ]
        grad = imperative_grad.imperative_grad(_default_vspace, self._tape,
                                               [target], sources)
        if not self._persistent:
            self._tape = None
        return grad
Exemplo n.º 6
0
    def grad_fn(*args):
        """Computes the gradient of the wrapped function."""
        tape.push_new_tape()
        try:
            end_node = f(*args)
            if end_node is None:
                raise ValueError(
                    "Cannot differentiate a function that returns None; "
                    "did you forget to return a value from {}?".format(
                        f.__name__))
        finally:
            popped_tape = tape.pop_tape()
        # Sorting variables by id, which is monotonically increasing in construction
        # order. This ensures unique order across executions.
        variables = list(
            sorted(popped_tape.watched_variables(),
                   key=lambda v: v.handle._id))  # pylint: disable=protected-access
        sources = [x.handle for x in variables]

        if not sources:
            raise ValueError("No trainable variables were accessed while the "
                             "function was being computed.")
        grad = imperative_grad.imperative_grad(_default_vspace, popped_tape,
                                               nest.flatten(end_node), sources)
        return end_node, list(zip(grad, variables))
Exemplo n.º 7
0
 def vjp(dy=None):
     if dy is not None:
         dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)]
     return imperative_grad.imperative_grad(this_tape,
                                            nest.flatten(result),
                                            sources,
                                            output_gradients=dy)
Exemplo n.º 8
0
 def grad_fn(*args):
     """Computes the gradient of the wrapped function."""
     tape.push_new_tape()
     end_node = f(*args)
     variables = tape.top_tape_watched_variables()
     sources = [x.handle for x in variables]
     grad = imperative_grad.imperative_grad(_default_vspace,
                                            nest.flatten(end_node), sources)
     return end_node, list(zip(grad, variables))
Exemplo n.º 9
0
 def grad_fn(*args):
   """Computes the gradient of the wrapped function."""
   tape.push_new_tape()
   end_node = f(*args)
   variables = tape.top_tape_watched_variables()
   sources = [x.handle for x in variables]
   grad = imperative_grad.imperative_grad(_default_vspace,
                                          nest.flatten(end_node),
                                          sources)
   return end_node, list(zip(grad, variables))
Exemplo n.º 10
0
  def gradient(self, target, sources, output_gradients=None):
    """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: Tensor (or list of tensors) to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
    """
    if self._tape is None:
      raise RuntimeError("GradientTape.gradient can only be called once on "
                         "non-persistent tapes.")
    if self._recording:
      if not self._persistent:
        self._pop_tape()
      else:
        logging.log_first_n(logging.WARN,
                            "Calling GradientTape.gradient on a persistent "
                            "tape inside it's context is significantly less "
                            "efficient than calling it outside the context (it "
                            "causes the gradient ops to be recorded on the "
                            "tape, leading to increased CPU and memory usage). "
                            "Only call GradientTape.gradient inside the "
                            "context if you actually want to trace the "
                            "gradient in order to compute higher order "
                            "derrivatives.", 1)

    flat_sources = nest.flatten(sources)
    flat_sources = [_handle_or_self(x) for x in flat_sources]

    if output_gradients is not None:
      output_gradients = [None if x is None else ops.convert_to_tensor(x)
                          for x in nest.flatten(output_gradients)]

    flat_grad = imperative_grad.imperative_grad(
        self._tape,
        nest.flatten(target),
        flat_sources,
        output_gradients=output_gradients)

    if not self._persistent:
      self._tape = None

    grad = nest.pack_sequence_as(sources, flat_grad)
    return grad
Exemplo n.º 11
0
    def grad_fn(*args):
        """Computes the gradient of the wrapped function."""
        tape.push_new_tape()
        end_node = f(*args)
        variables = tape.top_tape_watched_variables()
        sources = [x.handle for x in variables]

        if not sources:
            raise ValueError("no trainable variables were accessed while the "
                             "function was being computed.")
        grad = imperative_grad.imperative_grad(_default_vspace,
                                               tape.pop_tape(),
                                               nest.flatten(end_node), sources)
        return end_node, list(zip(grad, variables))
Exemplo n.º 12
0
  def grad_fn(*args):
    """Computes the gradient of the wrapped function."""
    tape.push_new_tape()
    end_node = f(*args)
    variables = tape.top_tape_watched_variables()
    sources = [x.handle for x in variables]

    if not sources:
      raise ValueError("no trainable variables were accessed while the "
                       "function was being computed.")
    grad = imperative_grad.imperative_grad(_default_vspace,
                                           tape.pop_tape(),
                                           nest.flatten(end_node),
                                           sources)
    return end_node, list(zip(grad, variables))
Exemplo n.º 13
0
    def gradient(self, target, sources, output_gradients=None):
        """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: Tensor (or list of tensors) to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
    """
        if self._tape is None:
            raise RuntimeError("GradientTape.gradient can only be called once "
                               "on non-persistent tapes, and "
                               "only when the context manager has exited.")
        flat_sources = nest.flatten(sources)
        flat_sources = [_handle_or_self(x) for x in flat_sources]

        if output_gradients is not None:
            output_gradients = [
                None if x is None else ops.convert_to_tensor(x)
                for x in nest.flatten(output_gradients)
            ]

        flat_grad = imperative_grad.imperative_grad(
            _default_vspace,
            self._tape,
            nest.flatten(target),
            flat_sources,
            output_gradients=output_gradients)

        if not self._persistent:
            self._tape = None

        grad = nest.pack_sequence_as(sources, flat_grad)
        return grad
Exemplo n.º 14
0
 def decorated(*args, **kwds):
   """Computes the value and gradient of the decorated function."""
   dy = kwds.pop("dy", None)
   if dy is not None:
     dy = ops.convert_to_tensor(dy)
   assert not kwds, "The gradient function can't take keyword arguments."
   tape.push_new_tape()
   sources = []
   args = [
       ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i]
       for i in range(len(args))
   ]
   args = _ensure_unique_tensor_objects(parameter_positions, args)
   for i in parameter_positions:
     sources.append(args[i])
     tape.watch(args[i])
   result = f(*args)
   return result, imperative_grad.imperative_grad(
       _default_vspace, tape.pop_tape(), nest.flatten(result), sources,
       output_gradients=nest.flatten(dy) if dy is not None else None)
Exemplo n.º 15
0
 def decorated(*args, **kwds):
   """Computes the value and gradient of the decorated function."""
   dy = kwds.pop("dy", None)
   if dy is not None:
     dy = ops.convert_to_tensor(dy)
   assert not kwds, "The gradient function can't take keyword arguments."
   tape.push_new_tape()
   sources = []
   args = [
       ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i]
       for i in range(len(args))
   ]
   args = _ensure_unique_tensor_objects(parameter_positions, args)
   for i in parameter_positions:
     sources.append(args[i])
     tape.watch(args[i])
   result = f(*args)
   return result, imperative_grad.imperative_grad(
       _default_vspace, nest.flatten(result), sources,
       output_gradients=nest.flatten(dy) if dy is not None else None)
Exemplo n.º 16
0
    def grad_fn(*args):
        """Computes the gradient of the wrapped function."""
        tape.push_new_tape()
        try:
            end_node = f(*args)
            if end_node is None:
                raise ValueError(
                    "Cannot differentiate a function that returns None; "
                    "did you forget to return a value from {}?".format(
                        f.__name__))
        finally:
            popped_tape = tape.pop_tape()
            variables = popped_tape.watched_variables()
        sources = [x.handle for x in variables]

        if not sources:
            raise ValueError("No trainable variables were accessed while the "
                             "function was being computed.")
        grad = imperative_grad.imperative_grad(_default_vspace, popped_tape,
                                               nest.flatten(end_node), sources)
        return end_node, list(zip(grad, variables))
Exemplo n.º 17
0
  def grad_fn(*args):
    """Computes the gradient of the wrapped function."""
    tape.push_new_tape()
    try:
      end_node = f(*args)
      if end_node is None:
        raise ValueError("Cannot differentiate a function that returns None; "
                         "did you forget to return a value from {}?".format(
                             f.__name__))
    finally:
      popped_tape = tape.pop_tape()
      variables = popped_tape.watched_variables()
    sources = [x.handle for x in variables]

    if not sources:
      raise ValueError("No trainable variables were accessed while the "
                       "function was being computed.")
    grad = imperative_grad.imperative_grad(_default_vspace,
                                           popped_tape,
                                           nest.flatten(end_node),
                                           sources)
    return end_node, list(zip(grad, variables))
Exemplo n.º 18
0
  def gradient(self, target, sources, output_gradients=None):
    """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: Tensor (or list of tensors) to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
    """
    if self._tape is None:
      raise RuntimeError("GradientTape.gradient can only be called once "
                         "on non-persistent tapes, and "
                         "only when the context manager has exited.")
    flat_sources = nest.flatten(sources)
    flat_sources = [_handle_or_self(x) for x in flat_sources]

    if output_gradients is not None:
      output_gradients = [None if x is None else ops.convert_to_tensor(x)
                          for x in nest.flatten(output_gradients)]

    flat_grad = imperative_grad.imperative_grad(
        _default_vspace, self._tape, nest.flatten(target), flat_sources,
        output_gradients=output_gradients)

    if not self._persistent:
      self._tape = None

    grad = nest.pack_sequence_as(sources, flat_grad)
    return grad
Exemplo n.º 19
0
  def grad_fn(*args, **kwds):
    """Computes the gradient of the wrapped function."""
    this_tape = tape.push_new_tape()
    try:
      end_node = f(*args, **kwds)
      if end_node is None:
        raise ValueError("Cannot differentiate a function that returns None; "
                         "did you forget to return a value from {}?".format(
                             f.__name__))
    finally:
      tape.pop_tape(this_tape)
    # Note: variables are returned in construction order. This ensures unique
    # order across executions.
    variables = this_tape.watched_variables()
    if not variables:
      raise ValueError("No trainable variables were accessed while the "
                       "function was being computed.")

    sources = [v.handle for v in variables]
    grad = imperative_grad.imperative_grad(this_tape, nest.flatten(end_node),
                                           sources)
    return end_node, list(zip(grad, variables))
Exemplo n.º 20
0
    def gradient(self,
                 target,
                 sources,
                 output_gradients=None,
                 unconnected_gradients=UnconnectedGradients.NONE):
        """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: Tensor (or list of tensors) to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.
      unconnected_gradients: a value which can either hold 'none' or 'zero' and
        alters the value which will be returned if the target and sources are
        unconnected. The possible values and effects are detailed in
        'UnconnectedGradients' and it defaults to 'none'.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
      ValueError: if the target is a variable or if unconnected gradients is
       called with an unknown value.
    """
        if self._tape is None:
            raise RuntimeError(
                "GradientTape.gradient can only be called once on "
                "non-persistent tapes.")
        if self._recording:
            if not self._persistent:
                self._pop_tape()
            else:
                logging.log_first_n(
                    logging.WARN,
                    "Calling GradientTape.gradient on a persistent "
                    "tape inside its context is significantly less "
                    "efficient than calling it outside the context (it "
                    "causes the gradient ops to be recorded on the "
                    "tape, leading to increased CPU and memory usage). "
                    "Only call GradientTape.gradient inside the "
                    "context if you actually want to trace the "
                    "gradient in order to compute higher order "
                    "derivatives.", 1)

        flat_targets = []
        for t in nest.flatten(target):
            if resource_variable_ops.is_resource_variable(t):
                with self:
                    t = ops.convert_to_tensor(t)
            flat_targets.append(t)

        flat_sources = nest.flatten(sources)
        flat_sources = [_handle_or_self(x) for x in flat_sources]

        if output_gradients is not None:
            output_gradients = [
                None if x is None else ops.convert_to_tensor(x)
                for x in nest.flatten(output_gradients)
            ]

        flat_grad = imperative_grad.imperative_grad(
            self._tape,
            flat_targets,
            flat_sources,
            output_gradients=output_gradients,
            unconnected_gradients=unconnected_gradients)

        if not self._persistent:
            self._tape = None

        grad = nest.pack_sequence_as(sources, flat_grad)
        return grad
Exemplo n.º 21
0
  def gradient(self,
               target,
               sources,
               output_gradients=None,
               unconnected_gradients=UnconnectedGradients.NONE):
    """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: Tensor (or list of tensors) to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.
      unconnected_gradients: a value which can either hold 'none' or 'zero' and
        alters the value which will be returned if the target and sources are
        unconnected. The possible values and effects are detailed in
        'UnconnectedGradients' and it defaults to 'none'.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
      ValueError: if the target is a variable or if unconnected gradients is
       called with an unknown value.
    """
    if self._tape is None:
      raise RuntimeError("GradientTape.gradient can only be called once on "
                         "non-persistent tapes.")
    if self._recording:
      if not self._persistent:
        self._pop_tape()
      else:
        logging.log_first_n(
            logging.WARN, "Calling GradientTape.gradient on a persistent "
            "tape inside its context is significantly less "
            "efficient than calling it outside the context (it "
            "causes the gradient ops to be recorded on the "
            "tape, leading to increased CPU and memory usage). "
            "Only call GradientTape.gradient inside the "
            "context if you actually want to trace the "
            "gradient in order to compute higher order "
            "derivatives.", 1)

    flat_targets = []
    for t in nest.flatten(target):
      if not t.dtype.is_floating:
        logging.vlog(
            logging.WARN, "The dtype of the target tensor must be "
            "floating (e.g. tf.float32) when calling GradientTape.gradient, "
            "got %r", t.dtype)
      if resource_variable_ops.is_resource_variable(t):
        with self:
          t = ops.convert_to_tensor(t)
      flat_targets.append(t)

    flat_sources = nest.flatten(sources)
    flat_sources_raw = flat_sources
    flat_sources = [_handle_or_self(x) for x in flat_sources]
    for t in flat_sources_raw:
      if not t.dtype.is_floating:
        logging.vlog(
            logging.WARN, "The dtype of the source tensor must be "
            "floating (e.g. tf.float32) when calling GradientTape.gradient, "
            "got %r", t.dtype)

    if output_gradients is not None:
      output_gradients = [None if x is None else ops.convert_to_tensor(x)
                          for x in nest.flatten(output_gradients)]

    flat_grad = imperative_grad.imperative_grad(
        self._tape,
        flat_targets,
        flat_sources,
        output_gradients=output_gradients,
        sources_raw=flat_sources_raw,
        unconnected_gradients=unconnected_gradients)

    if not self._persistent:
      self._tape = None

    grad = nest.pack_sequence_as(sources, flat_grad)
    return grad
Exemplo n.º 22
0
 def vjp(dy=None):
   if dy is not None:
     dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)]
   return imperative_grad.imperative_grad(
       this_tape, nest.flatten(result), sources, output_gradients=dy)
Exemplo n.º 23
0
 def vjp(dy=None):
   return imperative_grad.imperative_grad(
       _default_vspace, t, nest.flatten(result), sources,
       output_gradients=nest.flatten(dy) if dy is not None else None)
Exemplo n.º 24
0
  def gradient(self,
               target,
               sources,
               output_gradients=None,
               unconnected_gradients=UnconnectedGradients.NONE):
    """Computes the gradient using operations recorded in context of this tape.

    Note: Unless you set `persistent=True` a GradientTape can only be used to
    compute one set of gradients (or jacobians).

    Args:
      target: a list or nested structure of Tensors or Variables to be
        differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.
      unconnected_gradients: a value which can either hold 'none' or 'zero' and
        alters the value which will be returned if the target and sources are
        unconnected. The possible values and effects are detailed in
        'UnconnectedGradients' and it defaults to 'none'.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: If called on a used, non-persistent tape.
      RuntimeError: If called inside the context of the tape.
      ValueError: If the target is a variable or if unconnected gradients is
       called with an unknown value.
    """
    if self._tape is None:
      raise RuntimeError("A non-persistent GradientTape can only be used to"
                         "compute one set of gradients (or jacobians)")
    if self._recording:
      if not self._persistent:
        self._pop_tape()
      else:
        logging.log_first_n(
            logging.WARN, "Calling GradientTape.gradient on a persistent "
            "tape inside its context is significantly less "
            "efficient than calling it outside the context (it "
            "causes the gradient ops to be recorded on the "
            "tape, leading to increased CPU and memory usage). "
            "Only call GradientTape.gradient inside the "
            "context if you actually want to trace the "
            "gradient in order to compute higher order "
            "derivatives.", 1)

    num_ndarrays = 0
    flat_targets = []
    for t in nest.flatten(target):
      if not backprop_util.IsTrainable(t):
        logging.vlog(
            logging.WARN, "The dtype of the target tensor must be "
            "floating (e.g. tf.float32) when calling GradientTape.gradient, "
            "got %r", t.dtype)
      if resource_variable_ops.is_resource_variable(t):
        with self:
          t = ops.convert_to_tensor(t)
      elif isinstance(t, np_arrays.ndarray):
        t = t.data
        num_ndarrays += 1
      flat_targets.append(t)
    # Only rewrap if all targets are ndarray. If not, prefer tensors.
    rewrap_as_ndarray = num_ndarrays == len(flat_targets)

    flat_sources = nest.flatten(sources)
    flat_sources_raw = flat_sources
    flat_sources = [_handle_or_self(x) for x in flat_sources]
    for t in flat_sources_raw:
      if not backprop_util.IsTrainable(t):
        logging.vlog(
            logging.WARN, "The dtype of the source tensor must be "
            "floating (e.g. tf.float32) when calling GradientTape.gradient, "
            "got %r", t.dtype)
      if getattr(t, "is_packed", False):
        raise ValueError(
            "GradientTape.gradient is not supported on packed EagerTensors yet."
        )

    if output_gradients is not None:
      output_gradients = [None if x is None else ops.convert_to_tensor(x)
                          for x in nest.flatten(output_gradients)]

    flat_grad = imperative_grad.imperative_grad(
        self._tape,
        flat_targets,
        flat_sources,
        output_gradients=output_gradients,
        sources_raw=flat_sources_raw,
        unconnected_gradients=unconnected_gradients)

    if not self._persistent:
      # Keep track of watched variables before setting tape to None
      self._watched_variables = self._tape.watched_variables()
      self._tape = None

    if rewrap_as_ndarray:
      def _tensor_to_ndarray(x):
        if x is not None:
          return np_arrays.tensor_to_ndarray(x)
        return None
      flat_grad = nest.map_structure(_tensor_to_ndarray, flat_grad)

    grad = nest.pack_sequence_as(sources, flat_grad)
    return grad
Exemplo n.º 25
0
 def vjp(dy=None):
   return imperative_grad.imperative_grad(
       _default_vspace, t, nest.flatten(result), sources,
       output_gradients=nest.flatten(dy) if dy is not None else None)