Exemplo n.º 1
0
def _ensure_unique_tensor_objects(parameter_positions, args):
  """Make each of the parameter_positions in args a unique ops.Tensor object.

  Ensure that each parameter is treated independently.
  For example:

  def f(x, y): return x * y
  g = gradients_function(f)
  one = tf.constant(1.)

  g(one, one) should return [1., 1.]
  (even though the two arguments are the same Tensor object).

  Args:
    parameter_positions: List of indices into args defining the arguments to
      differentiate against.
    args: A list of arguments to the function to be differentiated.

  Returns:
    args, possibly edited in-place.
  """
  s = set()
  for (i, t) in enumerate(args):
    if i in parameter_positions:
      tid = ops.tensor_id(t)
      if tid in s:
        args[i] = gen_array_ops.identity(args[i])
      else:
        s.add(tid)
  return args
Exemplo n.º 2
0
  def decorated(*args, **kwds):
    """Computes the value and gradient of the decorated function."""
    parameter_positions = _get_arg_spec(f, params, args)
    assert not kwds, "The gradient function can't take keyword arguments."
    this_tape = tape.push_new_tape(persistent=persistent)
    try:
      sources = []
      args = [
          ops.convert_to_tensor(args[i])
          if i in parameter_positions else args[i]
          for i in range(len(args))
      ]
      args = _ensure_unique_tensor_objects(parameter_positions, args)
      for i in parameter_positions:
        sources.append(args[i])
        tape.watch(this_tape, args[i])
      result = f(*args)
      if result is None:
        raise ValueError("Cannot differentiate a function that returns None; "
                         "did you forget to return a value from {}?".format(
                             f.__name__))
      flat_result = nest.flatten(result)
      flat_result = [gen_array_ops.identity(x) for x in flat_result]
      result = nest.pack_sequence_as(result, flat_result)
    finally:
      tape.pop_tape(this_tape)
    def vjp(dy=None):
      if dy is not None:
        dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)]
      return imperative_grad.imperative_grad(
          this_tape, nest.flatten(result), sources, output_gradients=dy)

    return result, vjp
  def compute_gradients(self, loss, *args, **kwargs):
    # Record current global step for worker.
    with ops.colocate_with(loss):
      self._local_step = training_util.get_global_step() + 0

    with ops.control_dependencies([self._local_step]):
      loss = gen_array_ops.identity(loss)
      return self._opt.compute_gradients(loss, *args, **kwargs)
Exemplo n.º 4
0
  def decorated(*args, **kwargs):
    """Decorated function with custom gradient."""
    if context.in_graph_mode():
      if kwargs:
        raise ValueError(
            "custom_gradient in graph mode doesn't support keyword arguments.")
      name = "CustomGradient-%s" % tf_ops.uid()
      args = [tf_ops.convert_to_tensor(x) for x in args]
      result, grad_fn = f(*args)
      flat_result = nest.flatten(result)
      all_tensors = flat_result + args

      @tf_ops.RegisterGradient(name)
      def internal_grad_fn(unused_op, *result_grads):  # pylint: disable=unused-variable
        gradients = nest.flatten(grad_fn(*result_grads[:len(flat_result)]))
        # Need to return one value per input to the IdentityN, so pad the
        # gradients of the inputs of the custom_gradient function with the
        # gradients of the outputs as well.
        return ([None] * len(flat_result)) + gradients

      with tf_ops.get_default_graph().gradient_override_map(
          {"IdentityN": name}):
        all_tensors = array_ops.identity_n(all_tensors)
      return nest.pack_sequence_as(
          structure=result, flat_sequence=all_tensors[:len(flat_result)])

    input_tensors = [tf_ops.convert_to_tensor(x) for x in args]

    with tape.stop_recording():
      result, grad_fn = f(*args, **kwargs)
      flat_result = nest.flatten(result)
      # TODO(apassos) consider removing the identity below.
      flat_result = [gen_array_ops.identity(x) for x in flat_result]

    def actual_grad_fn(*outputs):
      return nest.flatten(grad_fn(*outputs))

    tape.record_operation(
        f.__name__,
        flat_result,
        input_tensors,
        actual_grad_fn)
    flat_result = list(flat_result)
    return nest.pack_sequence_as(result, flat_result)
Exemplo n.º 5
0
def _eager_mode_decorator(f, *args, **kwargs):
  """Implement custom gradient decorator for eager mode."""
  with backprop.GradientTape() as tape:
    result, grad_fn = f(*args, **kwargs)
  all_inputs = list(args) + list(kwargs.values())
  # The variables that grad_fn needs to return gradients for are the set of
  # variables used that are *not* part of the inputs.
  variables = [v for v in set(tape.watched_variables()) if v not in all_inputs]
  grad_argspec = tf_inspect.getfullargspec(grad_fn)
  if (variables and ("variables" not in grad_argspec.args) and
      not grad_argspec.varkw):
    raise TypeError("If using @custom_gradient with a function that "
                    "uses variables, then grad_fn must accept a keyword "
                    "argument 'variables'.")
  flat_result = nest.flatten(result)
  # TODO(apassos) consider removing the identity below.
  flat_result = [gen_array_ops.identity(x) for x in flat_result]

  input_tensors = [ops.convert_to_tensor(x) for x
                   in list(args) + list(variables)]
  arg_count = len(args)
  def actual_grad_fn(*result_grads):
    """Custom grad fn wrapper."""
    if variables:
      input_grads, variable_grads = grad_fn(*result_grads, variables=variables)
      if len(variable_grads) != len(variables):
        raise ValueError("Must return gradient for each variable from "
                         "@custom_gradient grad_fn.")
    else:
      input_grads = grad_fn(*result_grads)
      variable_grads = []
    flat_grads = nest.flatten(input_grads)
    if len(flat_grads) != arg_count:
      raise ValueError(
          "custom_gradient function expected to return", arg_count,
          "gradients but returned", len(flat_grads), "instead.")
    return nest.flatten(input_grads) + variable_grads

  tape_lib.record_operation(f.__name__, flat_result, input_tensors,
                            actual_grad_fn)
  flat_result = list(flat_result)
  return nest.pack_sequence_as(result, flat_result)
Exemplo n.º 6
0
    def decorated(*args, **kwds):
        """Computes the value and gradient of the decorated function."""
        parameter_positions = _get_arg_spec(f, params, args)
        assert not kwds, "The gradient function can't take keyword arguments."
        this_tape = tape.push_new_tape(persistent=persistent)
        try:
            sources = []
            args = [
                ops.convert_to_tensor(args[i])
                if i in parameter_positions else args[i]
                for i in range(len(args))
            ]
            args = _ensure_unique_tensor_objects(parameter_positions, args)
            for i in parameter_positions:
                sources.append(args[i])
                tape.watch(this_tape, args[i])
            result = f(*args)
            if result is None:
                raise ValueError(
                    "Cannot differentiate a function that returns None; "
                    "did you forget to return a value from {}?".format(
                        f.__name__))
            flat_result = nest.flatten(result)
            flat_result = [gen_array_ops.identity(x) for x in flat_result]
            result = nest.pack_sequence_as(result, flat_result)
        finally:
            tape.pop_tape(this_tape)

        def vjp(dy=None):
            if dy is not None:
                dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)]
            return imperative_grad.imperative_grad(this_tape,
                                                   nest.flatten(result),
                                                   sources,
                                                   output_gradients=dy)

        return result, vjp
Exemplo n.º 7
0
def _eager_mode_decorator(f, *args, **kwargs):
  """Implement custom gradient decorator for eager mode."""
  with backprop.GradientTape() as tape:
    result, grad_fn = f(*args, **kwargs)
  all_inputs = list(args) + list(kwargs.values())
  # The variables that grad_fn needs to return gradients for are the set of
  # variables used that are *not* part of the inputs.
  variables = [v for v in set(tape.watched_variables()) if v not in all_inputs]
  grad_argspec = tf_inspect.getfullargspec(grad_fn)
  if (variables and ("variables" not in grad_argspec.args) and
      not grad_argspec.varkw):
    raise TypeError("If using @custom_gradient with a function that "
                    "uses variables, then grad_fn must accept a keyword "
                    "argument 'variables'.")
  flat_result = nest.flatten(result)
  # TODO(apassos) consider removing the identity below.
  flat_result = [gen_array_ops.identity(x) for x in flat_result]

  def actual_grad_fn(*result_grads):
    """Custom grad fn wrapper."""
    if variables:
      input_grads, variable_grads = grad_fn(*result_grads, variables=variables)
      if len(variable_grads) != len(variables):
        raise ValueError("Must return gradient for each variable from "
                         "@custom_gradient grad_fn.")
    else:
      input_grads = grad_fn(*result_grads)
      variable_grads = []
    return nest.flatten(input_grads) + variable_grads

  input_tensors = [ops.convert_to_tensor(x) for x
                   in list(args) + list(variables)]
  tape_lib.record_operation(f.__name__, flat_result, input_tensors,
                            actual_grad_fn)
  flat_result = list(flat_result)
  return nest.pack_sequence_as(result, flat_result)
 def _AcceptGradientOp():
   with ops.control_dependencies(
       [self._opt.apply_gradients(
           grads_and_vars, global_step=global_step, name=name)]):
     return gen_array_ops.identity(0.0)
Exemplo n.º 9
0
    def _get_queue_ops_stale(self, var_update_op: ops.Operation,
                             source_op: ops.Operation, is_chief: bool,
                             is_trainable: bool) -> List[ops.Operation]:
        """
        Get queue operations for staleness synchronous parameter update.

        Maintain a list of queues of size equal to <staleness>. At the beginning of each call of this function
        (either by the chief worker or other workers), it checks whether each queue is not full. If yes, it pushes
        a token to each queue. If not, it does nothing (a no_op).
        Then, for the current worker that calls this function, it dequeues a token from its corresponding queue
        (indexed by its worker id).
        The potential enqueue operations and definite dequeue operation are grouped together, and have to be
        finished before the model moves on to the next step.
        As at each invocation of this function, a row of empty space in the list of queues will be filled. Thus
        <staleness> number of consecutive dequeue operations can be done by a worker without blocking, achieving
        stale synchronous parameter update with maximum <staleness> steps difference.

        Args:
            var_update_op: The op

        Returns:
            A list of queue operations.
        """
        var_op = var_update_op.inputs[UPDATE_OP_VAR_POS].op

        var_update_sync_queues = \
            [data_flow_ops.FIFOQueue(self._staleness, [dtypes.bool], shapes=None,
                                     name='%s_update_sync_queue_%d' % (var_op.name, i),
                                     shared_name='%s_update_sync_queue_%d' % (var_op.name, i))
             for i in range(self.num_workers)]

        # Enqueue one token to every queue if all queues are not full.
        def _enqueue_row_op():
            enqueue_ops = []
            for q in var_update_sync_queues:
                enqueue_ops.append(q.enqueue(False))
            enqueue_a_row_ops = control_flow_ops.group(*enqueue_ops)
            return enqueue_a_row_ops

        def _no_op():
            return gen_control_flow_ops.no_op()

        switch_cond = gen_array_ops.identity(True)
        for q in var_update_sync_queues:
            switch_cond = gen_math_ops.logical_and(
                switch_cond,
                gen_math_ops.less(q.size(),
                                  gen_array_ops.identity(self._staleness)))

        enqueue_a_row_ops = control_flow_ops.cond(switch_cond, _enqueue_row_op,
                                                  _no_op)

        queue_ops = [enqueue_a_row_ops]

        if is_chief:
            if is_trainable:
                var_update_deps = [
                    self._var_op_to_accum_apply_op[var_op], source_op
                ]
            else:
                var_update_deps = [var_update_op]
            with ops.control_dependencies(var_update_deps):
                dequeue = var_update_sync_queues[self.worker_id].dequeue()
        else:
            # wait for execution of var_update_op
            if is_trainable:
                with ops.control_dependencies(
                    [self._var_op_to_accum_apply_op[var_op]]):
                    dequeue = var_update_sync_queues[self.worker_id].dequeue()
            else:
                dequeue = var_update_sync_queues[self.worker_id].dequeue()
        queue_ops.append(dequeue)

        return queue_ops
Exemplo n.º 10
0
 def benchmark_tf_gradient_function_no_op(self):
     with context.device(CPU):
         m = gen_array_ops.identity(self._m_2)
         self._run(lambda: backprop.gradients_function(lambda x: x, [0])(m),
                   30000)
Exemplo n.º 11
0
 def benchmark_tf_gradient_forward_identity(self):
     with backprop.GradientTape() as tape:
         m = self._m_2
         tape.watch(m)
         self._run(lambda: gen_array_ops.identity(m), 30000)
Exemplo n.º 12
0
 def benchmark_slowpath_tf_identity(self):
     self._run(lambda: gen_array_ops.identity(1), 30000)
Exemplo n.º 13
0
 def benchmark_tf_identity(self):
     m = self._m_2
     self._run(lambda: gen_array_ops.identity(m), 30000)
Exemplo n.º 14
0
 def benchmark_tf_gradient_forward_identity(self):
   with backprop.GradientTape() as tape:
     m = self._m_2
     tape.watch(m)
     self._run(lambda: gen_array_ops.identity(m), 30000)
Exemplo n.º 15
0
 def benchmark_tf_identity(self):
   m = self._m_2
   self._run(lambda: gen_array_ops.identity(m), 30000)
 def _DropGradientOp():
   return gen_array_ops.identity(1.0)
 def _AcceptGradientOp():
   with ops.control_dependencies(
       [self._opt.apply_gradients(
           grads_and_vars, global_step=global_step, name=name)]):
     return gen_array_ops.identity(0.0)
 def _DropGradientOp():
     return gen_array_ops.identity(1.0)
Exemplo n.º 19
0
 def benchmark_tf_gradient_function_no_op(self):
   with context.device(CPU):
     m = gen_array_ops.identity(self._m_2)
     self._run(lambda: backprop.gradients_function(lambda x: x, [0])(m), 30000)
Exemplo n.º 20
0
 def benchmark_slowpath_tf_identity(self):
   self._run(lambda: gen_array_ops.identity(1), 30000)