Exemplo n.º 1
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
  """The gradient of a While op produced by while_loop."""
  body_graph = _get_body_graph(op)

  # Replace None gradients with zeros. This is needed because `grads` could have
  # None incoming gradients for the TensorLists. If we pass None's through, the
  # custom gradient of TensorListPopBack will create an EmptyTensorList inside
  # the FuncGraph which is undesirable.
  # TODO(b/80444525): There might be an issue with treating no gradient as zero
  # gradient in certain cases. Consider replacing None gradients with Zeros
  # for accumulators only.
  grads = [
      g if g is not None else array_ops.zeros_like(output)
      for g, output in zip(grads, op.outputs)
  ]

  body_grad_graph, args = _create_grad_func(
      body_graph, grads,
      util.unique_grad_fn_name(body_graph.name), op)

  intermediate_tensors = _get_intermediates(body_grad_graph)

  for intermediate_tensor in intermediate_tensors:
    tensor_list = list_ops.empty_tensor_list(
        element_dtype=intermediate_tensor.dtype,
        element_shape=_get_tensor_convertible_shape(intermediate_tensor.shape))
    with body_grad_graph.as_default():
      tensor_list_ph = body_grad_graph.capture(tensor_list, whitelisted=True)
      # Push the intermediate tensor to the tensor list.
      appended_tensor_list = list_ops.tensor_list_push_back(tensor_list_ph,
                                                            intermediate_tensor)
      # Add this modified tensor list to the list of outputs.
      body_grad_graph.outputs.append(appended_tensor_list)

  def grad_cond(counter, max_iters, *unused_args):
    return counter < max_iters

  loop_vars = args + body_grad_graph.external_captures
  grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
  cond_grad_graph = func_graph_module.func_graph_from_py_func(
      grad_cond_name, grad_cond, loop_vars, {},
      func_graph=util.WhileCondFuncGraph(grad_cond_name))

  assert len(loop_vars) == len(body_grad_graph.inputs)
  assert len(loop_vars) == len(body_grad_graph.outputs)
  assert len(loop_vars) == len(cond_grad_graph.inputs)

  outputs = gen_functional_ops._while(
      loop_vars,
      util.create_new_tf_function(cond_grad_graph),
      util.create_new_tf_function(body_grad_graph),
      output_shapes=[t.shape for t in body_grad_graph.outputs],
      name="%s_grad" % op.name)

  _copy_handle_data(body_grad_graph.outputs, outputs)
  _maybe_set_lowering_attr(outputs[0].op)

  # outputs[0] is the loop counter.
  # outputs[1] is the total number of loop iterations.
  return outputs[2:2 + len(op.inputs)]
Exemplo n.º 2
0
def While(input_, cond, body, name=None, hostmem=None):
  r"""output = input; While (Cond(output)) { output = Body(output) }.

  Args:
    input_: A list of `Tensor` objects.
      A list of input tensors whose types are T.
    cond: . A function takes 'input' and returns a tensor.  If the tensor is
      a scalar of non-boolean, the scalar is converted to a boolean
      according to the following rule: if the scalar is a numerical
      value, non-zero means True and zero means False; if the scalar is
      a string, non-empty means True and empty means False. If the
      tensor is not a scalar, non-emptiness means True and False
      otherwise.
    body: . A funcion takes a list of tensors and returns another
      list tensors. Both lists have the same types as specified
      by T.
    name: A name for the operation (optional).
    hostmem: A list of integer. If i is in the list, input[i] is a
      host memory tensor.

  Returns:
    A list of `Tensor` objects. Has the same type as `input`.
    A list of output tensors whose types are T.
  """
  ret = gen_functional_ops._while(input_, cond, body, name=name)
  if hostmem:
    input_attr = attr_value_pb2.AttrValue()
    input_attr.list.i.extend(hostmem)
    ret[0].op._set_attr("_input_hostmem", input_attr)  # pylint: disable=protected-access

    output_attr = attr_value_pb2.AttrValue()
    output_attr.list.i.extend(hostmem)
    ret[0].op._set_attr("_output_hostmem", output_attr)  # pylint: disable=protected-access
  return ret
Exemplo n.º 3
0
def While(input_, cond, body, name=None, hostmem=None):
  r"""output = input; While (Cond(output)) { output = Body(output) }.

  Args:
    input_: A list of `Tensor` objects.
      A list of input tensors whose types are T.
    cond: . A function takes 'input' and returns a tensor.  If the tensor is
      a scalar of non-boolean, the scalar is converted to a boolean
      according to the following rule: if the scalar is a numerical
      value, non-zero means True and zero means False; if the scalar is
      a string, non-empty means True and empty means False. If the
      tensor is not a scalar, non-emptiness means True and False
      otherwise.
    body: . A function takes a list of tensors and returns another
      list tensors. Both lists have the same types as specified
      by T.
    name: A name for the operation (optional).
    hostmem: A list of integer. If i is in the list, input[i] is a
      host memory tensor.

  Raises:
    ValueError: if `cond` has implicitly captured inputs or if `cond` and `body`
      have different signatures.

  Returns:
    A list of `Tensor` objects. Has the same type as `input`.
    A list of output tensors whose types are T.
  """
  if cond.captured_inputs:
    raise ValueError("While op 'cond' argument must be a function "
                     "without implicitly captured inputs.")

  if cond.declared_input_types != body.declared_input_types:
    raise ValueError(
        "While op 'cond' and 'body' signatures do not match. %r vs %r" %
        (cond.declared_input_types, body.declared_input_types))

  if body.captured_inputs:
    cond_dtypes = list(
        body.declared_input_types) + [t.dtype for t in body.captured_inputs]

    @function.Defun(*cond_dtypes, func_name="%s_Wrapper" % cond.name)
    def CondWrapper(*args):
      """A wrapper that handles loop-carried captured inputs."""
      return cond(*args[:len(body.declared_input_types)])

    ret = gen_functional_ops._while(
        input_ + body.captured_inputs,
        CondWrapper,
        _LoopBodyCaptureWrapper(body),
        name=name)
    # Slice off the loop-carried captured inputs.
    ret = ret[:-len(body.captured_inputs)]
  else:
    ret = gen_functional_ops._while(input_, cond, body, name=name)
  if hostmem:
    input_attr = attr_value_pb2.AttrValue()
    input_attr.list.i.extend(hostmem)
    ret[0].op._set_attr("_input_hostmem", input_attr)  # pylint: disable=protected-access

    output_attr = attr_value_pb2.AttrValue()
    output_attr.list.i.extend(hostmem)
    ret[0].op._set_attr("_output_hostmem", output_attr)  # pylint: disable=protected-access
  return ret
Exemplo n.º 4
0
def while_loop(cond, body, loop_vars, shape_invariants=None, name=None):
  """Like tf.while_loop, except emits a single While op."""
  flattened_loop_vars = nest.flatten(loop_vars)
  if shape_invariants is not None:
    nest.assert_same_structure(loop_vars, shape_invariants)
    flattened_shapes = nest.flatten(shape_invariants)
  else:
    flattened_shapes = [t.shape for t in flattened_loop_vars]

  del shape_invariants

  if not name:
    name = "while"

  with ops.name_scope(name) as scope:
    with ops.name_scope(None):
      cond_name = _get_unique_name(("%scond" % scope).replace("/", "_"))
      body_name = _get_unique_name(("%sbody" % scope).replace("/", "_"))

    num_outputs = len(flattened_loop_vars)

    # Add loop counter needed for computing gradients.
    flattened_loop_vars = [constant_op.constant(0., name="loop_counter")
                          ] + flattened_loop_vars

    flattened_shapes = [tensor_shape.scalar()] + flattened_shapes

    # Build a `cond` wrapper that can handle the extra counter loop_var.
    def wrapped_cond(unused_loop_counter, *loop_vars):
      return cond(*loop_vars)

    signature = [
        tensor_spec.TensorSpec(shape, t.dtype)
        for shape, t in zip(flattened_shapes, flattened_loop_vars)
    ]
    cond_graph = function.func_graph_from_py_func(
        cond_name, wrapped_cond, flattened_loop_vars, {}, signature=signature)

    # Add external_captures of cond to the list of loop vars.
    # Note that external tensors will be treated as loop invariants, i.e.,
    # the value of that tensor in each iteration is the same as it was at the
    # beginning of the loop execution.
    flattened_loop_vars = flattened_loop_vars + cond_graph.external_captures
    flattened_shapes = flattened_shapes + [
        t.shape for t in cond_graph.external_captures
    ]

    def wrapped_body(loop_counter, *args):
      """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args
          args[:num_outputs] - Args for the original loop body.
          args[num_outputs:] - External captures of cond. These get passed
            through as is.

      Returns:
        A list of tensors the same length as args.
      """
      outputs = body(*args[:num_outputs])
      if not isinstance(outputs, collections.Sequence):
        outputs = [outputs]

      # Return the external_captures of cond_graph as is, i.e., treat them as
      # loop invariants.
      # TODO(srbs): Update lowering code to create _Enter nodes with
      # is_constant=True for inputs that are directly passed to outputs.
      return [loop_counter + 1] + list(outputs) + list(args[num_outputs:])

    signature = [
        tensor_spec.TensorSpec(shape, t.dtype)
        for shape, t in zip(flattened_shapes, flattened_loop_vars)
    ]
    body_graph = function.func_graph_from_py_func(
        body_name, wrapped_body, flattened_loop_vars, {}, signature=signature)
    # Add external captures of body to the list of loop vars.
    # Note that external tensors will be treated as loop invariants, i.e.,
    # the value of that tensor in each iteration is the same as it was at the
    # beginning of the loop execution.
    flattened_loop_vars = flattened_loop_vars + body_graph.external_captures
    # TODO(srbs): Update lowering code to create _Enter nodes with
    # is_constant=True for inputs that are directly passed to outputs.
    body_graph.outputs.extend(body_graph.internal_captures)

    # Capture `external_captures` of `body_graph` in `cond_graph` so that it
    # expects to receive those as arguments.
    # TODO(srbs): Dedup tensors that are captured in both the cond and body.
    # This logic already exists in cond_v2.
    with cond_graph.as_default():
      for external_capture in body_graph.external_captures:
        cond_graph.capture(external_capture)

    # Export all tensors in the loop body that may be needed for gradient
    # computation. We do this by accumulating the intermediate values in
    # TensorLists.
    intermediate_tensors = _get_intermediates(body_graph)

    for intermediate_tensor in intermediate_tensors:
      # TODO(srbs): Cache and re-use empty tensor lists.
      tensor_list = list_ops.empty_tensor_list(
          element_dtype=intermediate_tensor.dtype,
          element_shape=_get_tensor_convertible_shape(
              intermediate_tensor.shape))
      flattened_loop_vars.append(tensor_list)
      with cond_graph.as_default():
        # Add a placeholder to cond_graph's inputs corresponding to the
        # tensor_list.
        cond_graph.capture(tensor_list)
      with body_graph.as_default():
        # Push the intermediate tensor to the tensor list. This captures the
        # `tensor_list` as well.
        appended_tensor_list = list_ops.tensor_list_push_back(
            tensor_list,
            intermediate_tensor)
        # Add this modified tensor list to the list of outputs.
        body_graph.outputs.append(appended_tensor_list)

    # Make sure that the shapes of the loop outputs are compatible with the
    # shape invariants, or the shapes of the loop vars if the invariants are not
    # specified.
    _check_shapes_compat(body_graph.outputs[1:1 + num_outputs],
                         flattened_shapes[1:1 + num_outputs],
                         flattened_loop_vars[1:1 + num_outputs])
    outputs = gen_functional_ops._while(
        flattened_loop_vars,
        cond_v2._create_new_tf_function(cond_graph),
        cond_v2._create_new_tf_function(body_graph),
        output_shapes=[t.shape for t in body_graph.outputs],
        name=scope)

    _copy_handle_data(body_graph.outputs, outputs)
    _maybe_set_lowering_attr(outputs[0].op)

  # First var is loop counter.
  if num_outputs == 1:
    return outputs[1]
  else:
    return nest.pack_sequence_as(loop_vars, outputs[1:1 + num_outputs])
Exemplo n.º 5
0
def while_loop(cond,
               body,
               loop_vars,
               shape_invariants=None,
               maximum_iterations=None,
               name=None,
               return_same_structure=True):
  """Like tf.while_loop, except emits a single While op."""
  maximum_iterations = _validate_and_convert_to_tensor(maximum_iterations)
  # Keep the original loop_vars around to know which args were TensorArrays.
  orig_loop_vars = loop_vars
  # Cache its length since we use it at multiple places below.
  len_orig_loop_vars = len(orig_loop_vars)

  # Convert TensorArrays to their flow variables. These get converted back to
  # TensorArrays before calling `cond` and `body`. See `wrapped_cond` and
  # `wrapped_body` below.
  loop_vars = list(_tensor_array_to_flow(orig_loop_vars))
  loop_vars = nest.map_structure(
      ops.internal_convert_to_tensor_or_indexed_slices, loop_vars)
  if shape_invariants is not None:
    nest.assert_same_structure(orig_loop_vars, shape_invariants)
  else:
    shape_invariants = nest.map_structure(lambda t: t.shape, loop_vars)

  if not name:
    name = "while"

  with ops.name_scope(name) as scope:
    with ops.name_scope(None):
      cond_name = util.unique_fn_name(scope, "cond")
      body_name = util.unique_fn_name(scope, "body")

    loop_counter = constant_op.constant(
        0,
        dtype=maximum_iterations.dtype
        if maximum_iterations is not None else None,
        name="loop_counter")
    # Add loop counter needed for computing gradients.
    loop_vars = [loop_counter] + loop_vars

    shape_invariants = type(shape_invariants)([tensor_shape.scalar()
                                              ]) + shape_invariants

    # Automatic control dependencies are added in defuns, but not in v1
    # graphs. Propagate that behavior here.
    add_control_dependencies = util.in_defun()

    # Build a `cond` wrapper that can handle the extra counter loop_var.
    def wrapped_cond(loop_counter, *args):
      # Convert the flow variables in `args` to TensorArrays. `args` should
      # already have the same structure as `orig_loop_vars` but currently there
      # is no nest.zip so we call `_pack_sequence_as` which flattens both
      # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
      # and packs it into the structure of `orig_loop_vars`.
      if maximum_iterations is None:
        return cond(*_pack_sequence_as(orig_loop_vars, args))
      else:
        return math_ops.logical_and(
            loop_counter < maximum_iterations,
            cond(*_pack_sequence_as(orig_loop_vars, args)))

    cond_graph = func_graph_module.func_graph_from_py_func(
        cond_name,
        wrapped_cond,
        loop_vars, {},
        signature=_build_signature(loop_vars, shape_invariants),
        func_graph=util.WhileCondFuncGraph(cond_name),
        add_control_dependencies=add_control_dependencies)

    # Add external_captures of cond to the list of loop vars.
    # Note that external tensors will be treated as loop invariants, i.e.,
    # the value of that tensor in each iteration is the same as it was at the
    # beginning of the loop execution.
    loop_vars = loop_vars + cond_graph.external_captures
    shape_invariants = shape_invariants + type(shape_invariants)(
        [t.shape for t in cond_graph.external_captures])

    def wrapped_body(loop_counter, *args):
      """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args
          args[:len_orig_loop_vars] - Args for the original loop body.
          args[len_orig_loop_vars:] - External captures of cond. These get
            passed through as is.

      Returns:
        A list of tensors the same length as args.
      """
      # Convert the flow variables in `args` to TensorArrays. `args` should
      # already have the same structure as `orig_loop_vars` but currently there
      # is no nest.zip so we call `_pack_sequence_as` which flattens both
      # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
      # and packs it into the structure of `orig_loop_vars`.
      outputs = body(
          *_pack_sequence_as(orig_loop_vars, args[:len_orig_loop_vars]))
      if not nest.is_sequence(outputs):
        outputs = [outputs]
      # Compare the structure of input and output of body converting the
      # top-level tuples to list to be compatible with legacy while_loop.
      nest.assert_same_structure(list(outputs), list(orig_loop_vars))

      outputs = _tensor_array_to_flow(outputs)

      # Return the external_captures of cond_graph as is, i.e., treat them as
      # loop invariants.
      # TODO(srbs): Update lowering code to create _Enter nodes with
      # is_constant=True for inputs that are directly passed to outputs.
      return [loop_counter + 1] + list(outputs) + list(
          args[len_orig_loop_vars:])

    body_graph = func_graph_module.func_graph_from_py_func(
        body_name,
        wrapped_body,
        loop_vars, {},
        signature=_build_signature(loop_vars, shape_invariants),
        func_graph=util.WhileBodyFuncGraph(body_name),
        add_control_dependencies=add_control_dependencies)
    # Add external captures of body to the list of loop vars.
    # Note that external tensors will be treated as loop invariants, i.e.,
    # the value of that tensor in each iteration is the same as it was at the
    # beginning of the loop execution.
    loop_vars = loop_vars + body_graph.external_captures
    # TODO(srbs): Update lowering code to create _Enter nodes with
    # is_constant=True for inputs that are directly passed to outputs.
    body_graph.outputs.extend(body_graph.internal_captures)

    # Capture `external_captures` of `body_graph` in `cond_graph` so that it
    # expects to receive those as arguments.
    # TODO(b/118457764): Dedup tensors that are captured in both the cond and
    # body. This logic already exists in cond_v2.
    with cond_graph.as_default():
      for external_capture in body_graph.external_captures:
        assert external_capture not in cond_graph.captures, (
            "Looks like both cond and body are capturing the same tensor %s. "
            "This is not supported yet. For now consider passing,"
            " this as a loop variable." % str(external_capture))
        cond_graph.capture(external_capture)

    # Export all tensors in the loop body that may be needed for gradient
    # computation. We do this by accumulating the intermediate values in
    # TensorLists.
    intermediate_tensors = _get_intermediates(body_graph)

    for intermediate_tensor in intermediate_tensors:
      tensor_list = list_ops.empty_tensor_list(
          element_dtype=intermediate_tensor.dtype,
          element_shape=intermediate_tensor.shape,
          max_num_elements=maximum_iterations)
      loop_vars.append(tensor_list)
      with cond_graph.as_default():
        # Add a placeholder to cond_graph's inputs corresponding to the
        # tensor_list.
        cond_graph.capture(tensor_list)
      with body_graph.as_default():
        # Push the intermediate tensor to the tensor list. This captures the
        # `tensor_list` as well.
        appended_tensor_list = list_ops.tensor_list_push_back(
            tensor_list,
            intermediate_tensor)
        # Add this modified tensor list to the list of outputs.
        body_graph.outputs.append(appended_tensor_list)

    # Make sure that the shapes of the loop outputs are compatible with the
    # shape invariants, or the shapes of the loop vars if the invariants are not
    # specified.
    num_flattened_outputs = len(nest.flatten(orig_loop_vars))
    _check_shapes_compat(
        body_graph.outputs[1:1 + num_flattened_outputs],
        nest.flatten(shape_invariants[1:1 + len_orig_loop_vars]),
        nest.flatten(loop_vars[1:1 + len_orig_loop_vars]))
    flattened_loop_vars = nest.flatten(loop_vars)
    _check_num_inputs_outputs(cond_graph, body_graph,
                              len(flattened_loop_vars))

    outputs = gen_functional_ops._while(
        flattened_loop_vars,
        util.create_new_tf_function(cond_graph),
        util.create_new_tf_function(body_graph),
        output_shapes=[t.shape for t in body_graph.outputs],
        name=scope)

    _copy_handle_data(body_graph.outputs, outputs)
    util.maybe_set_lowering_attr(outputs[0].op)
    _maybe_set_maximum_iterations_attr(outputs[0].op, maximum_iterations)

    # Return identities for each output of the While op, rather than the output
    # of the While op directly. This makes pruning work if the output of
    # while_loop() is fetched: the lowering pass converts the While outputs into
    # IdentityN outputs, which if fetched will cause all ops in the body to be
    # run (since it takes all exit ops as input). After lowering, each output
    # identity op will end up with only the appropriate exit op as input.
    outputs = tuple(array_ops.identity(t) for t in outputs)

  # First var is loop counter.
  outputs = _pack_sequence_as(orig_loop_vars,
                              outputs[1:1 + num_flattened_outputs])

  if return_same_structure:
    return outputs

  flattened_outputs = nest.flatten(outputs)
  if len(flattened_outputs) == 1:
    return flattened_outputs[0]
  else:
    return outputs
Exemplo n.º 6
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
  """The gradient of a While op produced by while_loop."""
  body_graph = _get_body_graph(op)

  # Set the incoming gradient of TensorArray handles to None. The gradient
  # implementation currently assumes all resource tensors correspond to float32
  # ResourceVariables, which can lead to runtime shape errors when used with a
  # TensorArray. This is a workaround until TensorArrays are reimplemented with
  # TensorLists instead of resources.
  # Also set the incoming gradient of non-trainable inputs to None. It is
  # possible that we receive non-None gradients for non-trainable types in
  # nested while loops because we accumulate outputs of the inner while as
  # variant tensors which are trainable and hence receive zeros_like tensors in
  # the gradient pass. The non-trainable tensors then receive the popped zeros
  # tensor from this zeros variant. The gradient for the loop vars corresponding
  # to these tensors is None or zeros (this happens only if the loop var is
  # accumulated as well) in _grad_fn so we reset these.
  # TODO(b/118712257): Remove the IsTrainable filter once we can handle None
  # output grads in _grad_fn.
  grads = [
      None if _is_tensor_array_handle(output) or
      not gradients_impl.IsTrainable(output) else grad
      for grad, output in zip(grads, op.outputs)
  ]

  # Ensure that all non-resource trainable outputs have incoming gradients.
  assert all(g is not None or o.dtype == dtypes.resource or
             not gradients_impl.IsTrainable(o)
             for o, g in zip(op.outputs, grads)
            ), "All trainable loop vars must receive incoming gradients."
  # We compute the gradient for the sub-graph between trainable ys and xs
  # with non-None incoming gradients. We later pad the None's to the list of
  # outputs.
  ys, xs, non_none_grads = zip(*[(y, x, grad) for (y, x, grad) in zip(
      body_graph.outputs, body_graph.inputs, grads) if grad is not None])

  body_grad_graph, args = _create_grad_func(
      ys, xs, non_none_grads, body_graph,
      util.unique_grad_fn_name(body_graph.name), op)

  intermediate_tensors = _get_intermediates(body_grad_graph)

  maximum_iterations = op.get_attr(
      "_maximum_iterations") if _is_in_xla_context() else None
  assert not _is_in_xla_context() or maximum_iterations is not None
  for intermediate_tensor in intermediate_tensors:
    tensor_list = list_ops.empty_tensor_list(
        element_dtype=intermediate_tensor.dtype,
        element_shape=intermediate_tensor.shape,
        max_num_elements=maximum_iterations)

    with body_grad_graph.as_default():
      tensor_list_ph = body_grad_graph.capture(tensor_list, whitelisted=True)
      # Push the intermediate tensor to the tensor list.
      appended_tensor_list = list_ops.tensor_list_push_back(tensor_list_ph,
                                                            intermediate_tensor)
      # Add this modified tensor list to the list of outputs.
      body_grad_graph.outputs.append(appended_tensor_list)

  def grad_cond(counter, max_iters, *unused_args):
    return counter < max_iters

  loop_vars = args + body_grad_graph.external_captures
  grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
  cond_grad_graph = func_graph_module.func_graph_from_py_func(
      grad_cond_name, grad_cond, loop_vars, {},
      func_graph=util.WhileCondFuncGraph(grad_cond_name))

  _check_num_inputs_outputs(cond_grad_graph, body_grad_graph, len(loop_vars))

  outputs = gen_functional_ops._while(
      loop_vars,
      util.create_new_tf_function(cond_grad_graph),
      util.create_new_tf_function(body_grad_graph),
      output_shapes=[t.shape for t in body_grad_graph.outputs],
      name="%s_grad" % op.name)

  _copy_handle_data(body_grad_graph.outputs, outputs)
  util.maybe_set_lowering_attr(outputs[0].op)
  _maybe_set_maximum_iterations_attr(outputs[0].op, maximum_iterations)

  # See comment in while_loop.
  outputs = [array_ops.identity(t) for t in outputs]

  # Set None as the output gradient for tensors with None input gradient
  # e.g. TensorArray handles.
  # outputs[0] is the loop counter.
  # outputs[1] is the total number of loop iterations.
  index = 2
  none_padded_outputs = []
  for g in grads:
    if g is None:
      none_padded_outputs.append(None)
    else:
      none_padded_outputs.append(outputs[index])
      index += 1
  return none_padded_outputs
Exemplo n.º 7
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
    """The gradient of a While op produced by while_loop."""
    body_graph = _get_body_graph(op)

    # Set the incoming gradient of TensorArray handle to None.
    # TODO(b/118164915): We need a way of distinguising b/w TensorArray resource
    # handles and ResourceVariables and set the default gradient of only the
    # TensorArray handle to None.
    grads = [
        None if output.dtype == dtypes.resource else g
        for g, output in zip(grads, op.outputs)
    ]

    # Ensure that all non-resource trainable outputs have incoming gradients.
    assert all(g is not None or o.dtype == dtypes.resource
               or not gradients_impl.IsTrainable(o)
               for o, g in zip(op.outputs, grads)
               ), "All trainable loop vars must receive incoming gradients."
    # We compute the gradient for the sub-graph between trainable ys and xs
    # with non-None incoming gradients. We later pad the None's to the list of
    # outputs.
    ys, xs, non_none_grads = zip(
        *[(y, x, grad)
          for (y, x, grad) in zip(body_graph.outputs, body_graph.inputs, grads)
          if grad is not None])

    body_grad_graph, args = _create_grad_func(
        ys, xs, non_none_grads, body_graph,
        util.unique_grad_fn_name(body_graph.name), op)

    intermediate_tensors = _get_intermediates(body_grad_graph)

    for intermediate_tensor in intermediate_tensors:
        tensor_list = list_ops.empty_tensor_list(
            element_dtype=intermediate_tensor.dtype,
            element_shape=_get_tensor_convertible_shape(
                intermediate_tensor.shape))
        with body_grad_graph.as_default():
            tensor_list_ph = body_grad_graph.capture(tensor_list,
                                                     whitelisted=True)
            # Push the intermediate tensor to the tensor list.
            appended_tensor_list = list_ops.tensor_list_push_back(
                tensor_list_ph, intermediate_tensor)
            # Add this modified tensor list to the list of outputs.
            body_grad_graph.outputs.append(appended_tensor_list)

    def grad_cond(counter, max_iters, *unused_args):
        return counter < max_iters

    loop_vars = args + body_grad_graph.external_captures
    grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
    cond_grad_graph = func_graph_module.func_graph_from_py_func(
        grad_cond_name,
        grad_cond,
        loop_vars, {},
        func_graph=util.WhileCondFuncGraph(grad_cond_name))

    assert len(loop_vars) == len(body_grad_graph.inputs)
    assert len(loop_vars) == len(body_grad_graph.outputs)
    assert len(loop_vars) == len(cond_grad_graph.inputs)

    outputs = gen_functional_ops._while(
        loop_vars,
        util.create_new_tf_function(cond_grad_graph),
        util.create_new_tf_function(body_grad_graph),
        output_shapes=[t.shape for t in body_grad_graph.outputs],
        name="%s_grad" % op.name)

    _copy_handle_data(body_grad_graph.outputs, outputs)
    _maybe_set_lowering_attr(outputs[0].op)

    # Set None as the output gradient for tensors with None input gradient
    # e.g. TensorArray handles.
    # outputs[0] is the loop counter.
    # outputs[1] is the total number of loop iterations.
    index = 2
    none_padded_outputs = []
    for g in grads:
        if g is None:
            none_padded_outputs.append(None)
        else:
            none_padded_outputs.append(outputs[index])
            index += 1
    return none_padded_outputs
Exemplo n.º 8
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
    """The gradient of a While op produced by while_loop."""
    # Note that op is not always the same as while_op because the gradient tape,
    # for eager mode compatibility, forgets information about the proper op. Since
    # the loop cannot run in eager mode, however, we can safely introspect into
    # the graph here.
    while_op = op.outputs[0].op
    cond_graph = _get_graph(while_op, "cond")
    body_graph = _get_graph(while_op, "body")
    orig_num_params = len(body_graph.outputs)

    maximum_iterations = op.inputs[1]
    parallel_iterations = op.get_attr("parallel_iterations")

    grads = [
        _preprocess_grad(grad, body_out,
                         while_out) for grad, body_out, while_out in zip(
                             grads, body_graph.outputs, while_op.outputs)
    ]

    # We compute the gradient for the sub-graph between trainable ys and xs
    # with non-None incoming gradients. We later pad the None's to the list of
    # outputs.
    ys, xs, non_none_grads = zip(
        *[(y, x, grad)
          for (y, x, grad) in zip(body_graph.outputs, body_graph.inputs, grads)
          if grad is not None])

    body_grad_graph, args = _create_grad_func(
        ys, xs, non_none_grads, cond_graph, body_graph,
        util.unique_grad_fn_name(body_graph.name), op, maximum_iterations)

    if body_grad_graph.while_op_needs_rewrite:
        # Modify 'op' to output the intermediate accumulators needed by the grad
        # function.
        # NOTE(skyewm): if there are any active sessions, this modification to `op`
        # may make them unrunnable!

        cond_graph.name += "_rewritten"
        body_graph.name += "_rewritten"

        new_inputs = body_grad_graph.empty_tensor_lists
        new_outputs = body_graph.outputs[orig_num_params:]

        while_op._set_func_attr("cond",
                                util.create_new_tf_function(cond_graph))
        while_op._set_func_attr("body",
                                util.create_new_tf_function(body_graph))
        while_op._set_type_list_attr("T", body_graph.output_types)
        while_op._set_shape_list_attr("output_shapes",
                                      body_graph.output_shapes)
        while_op._add_while_inputs(new_inputs)
        while_op._add_outputs([t.dtype for t in new_outputs],
                              [t.shape for t in new_outputs])
        _copy_handle_data(new_outputs, op.outputs[orig_num_params:])

    captured_inputs = _resolve_grad_captures(body_graph, body_grad_graph,
                                             while_op)
    loop_vars = args + captured_inputs

    # This modifies body_grad_graph.
    loop_vars = while_v2_indexed_slices_rewriter.rewrite_grad_indexed_slices(
        grads, body_grad_graph, loop_vars, while_op.inputs)

    def grad_cond(counter, unused_maximum_iterations_arg, forward_loop_iters,
                  *unused_args):
        return counter < forward_loop_iters

    grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
    cond_grad_graph = func_graph_module.func_graph_from_py_func(
        grad_cond_name,
        grad_cond,
        loop_vars, {},
        func_graph=util.WhileCondFuncGraph(grad_cond_name))

    _check_num_inputs_outputs(cond_grad_graph, body_grad_graph, len(loop_vars))

    outputs = gen_functional_ops._while(
        loop_vars,
        util.create_new_tf_function(cond_grad_graph),
        util.create_new_tf_function(body_grad_graph),
        output_shapes=[t.shape for t in body_grad_graph.outputs],
        parallel_iterations=parallel_iterations,
        name="%s_grad" % while_op.name)
    grad_op = outputs[0].op

    _copy_handle_data(body_grad_graph.outputs, outputs)
    util.maybe_set_lowering_attr(grad_op)
    util.maybe_propagate_compile_time_consts_in_xla(grad_op)

    # See comment in while_loop.
    outputs = [array_ops.identity(t) for t in outputs]
    return _get_structured_grad_output(outputs, grads, body_grad_graph)
Exemplo n.º 9
0
def while_loop(cond,
               body,
               loop_vars,
               shape_invariants=None,
               parallel_iterations=10,
               maximum_iterations=None,
               name=None,
               return_same_structure=True):
    """Like tf.while_loop, except emits a single While op."""
    # Keep the original loop_vars around to know which args were TensorArrays.
    orig_loop_vars = loop_vars
    # Cache its length since we use it at multiple places below.
    len_orig_loop_vars = len(orig_loop_vars)

    # Convert TensorArrays to their flow variables. These get converted back to
    # TensorArrays before calling `cond` and `body`. See `wrapped_cond` and
    # `wrapped_body` below.
    loop_vars = list(_tensor_array_to_flow(orig_loop_vars))
    loop_vars = nest.map_structure(
        ops.internal_convert_to_tensor_or_indexed_slices, loop_vars)
    if shape_invariants is not None:
        nest.assert_same_structure(orig_loop_vars, shape_invariants)
    else:
        shape_invariants = nest.map_structure(lambda t: t.shape, loop_vars)

    if not name:
        name = "while"

    with ops.name_scope(name) as scope:
        with ops.name_scope(None):
            cond_name = util.unique_fn_name(scope, "cond")
            body_name = util.unique_fn_name(scope, "body")
        maximum_iterations_loop_var = _build_maximum_iterations_loop_var(
            maximum_iterations)
        loop_counter = constant_op.constant(
            0,
            dtype=maximum_iterations_loop_var.dtype
            if maximum_iterations is not None else None,
            name="loop_counter")
        # Add loop counter needed for computing gradients.
        loop_vars = [loop_counter, maximum_iterations_loop_var] + loop_vars

        shape_invariants = type(shape_invariants)(
            [tensor_shape.scalar(),
             tensor_shape.scalar()]) + shape_invariants

        # Automatic control dependencies are added in defuns, but not in v1
        # graphs. Propagate that behavior here.
        add_control_dependencies = ops.get_default_graph(
        )._add_control_dependencies

        # Build a `cond` wrapper that can handle the extra counter loop_var.
        def wrapped_cond(loop_counter, maximum_iterations_arg, *args):
            # Convert the flow variables in `args` to TensorArrays. `args` should
            # already have the same structure as `orig_loop_vars` but currently there
            # is no nest.zip so we call `_pack_sequence_as` which flattens both
            # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
            # and packs it into the structure of `orig_loop_vars`.
            if maximum_iterations is None:
                return cond(*_pack_sequence_as(orig_loop_vars, args))
            else:
                return math_ops.logical_and(
                    loop_counter < maximum_iterations_arg,
                    cond(*_pack_sequence_as(orig_loop_vars, args)))

        # NOTE(skyewm): we set collections to the outer graph's collections for
        # compatibility with TPUEstimator.
        cond_graph = func_graph_module.func_graph_from_py_func(
            cond_name,
            wrapped_cond,
            [],  # We provide signature instead of args.
            {},
            signature=_build_signature(loop_vars, shape_invariants),
            func_graph=util.WhileCondFuncGraph(
                cond_name, collections=ops.get_default_graph()._collections),  # pylint: disable=protected-access
            add_control_dependencies=add_control_dependencies)

        def wrapped_body(loop_counter, maximum_iterations_arg, *args):
            """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        maximum_iterations_arg: Maximum iterations of the loop.
        *args: List of args

      Returns:
        A list of tensors the same length as args.
      """
            # Capture the tensors already captured in cond_graph so that they appear
            # in the same order in body_graph.external_captures.
            for t in cond_graph.external_captures:
                ops.get_default_graph().capture(t)

            # Convert the flow variables in `args` to TensorArrays. `args` should
            # already have the same structure as `orig_loop_vars` but currently there
            # is no nest.zip so we call `_pack_sequence_as` which flattens both
            # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
            # and packs it into the structure of `orig_loop_vars`.
            outputs = body(*_pack_sequence_as(orig_loop_vars, args))
            if not nest.is_sequence(outputs):
                outputs = [outputs]
            # Compare the structure of input and output of body converting the
            # top-level tuples to list to be compatible with legacy while_loop.
            nest.assert_same_structure(list(outputs), list(orig_loop_vars))

            outputs = _tensor_array_to_flow(outputs)

            # TODO(srbs): Update lowering code to create _Enter nodes with
            # is_constant=True for inputs that are directly passed to outputs.
            return [loop_counter + 1, maximum_iterations_arg] + list(outputs)

        body_graph = func_graph_module.func_graph_from_py_func(
            body_name,
            wrapped_body,
            [],  # We provide signature instead of args.
            {},
            signature=_build_signature(loop_vars, shape_invariants),
            func_graph=util.WhileBodyFuncGraph(
                body_name, collections=ops.get_default_graph()._collections),  # pylint: disable=protected-access
            add_control_dependencies=add_control_dependencies)
        # Add external captures of body to the list of loop vars.
        # Note that external tensors will be treated as loop invariants, i.e.,
        # the value of that tensor in each iteration is the same as it was at the
        # beginning of the loop execution.
        loop_vars = loop_vars + body_graph.external_captures
        # TODO(srbs): Update lowering code to create _Enter nodes with
        # is_constant=True for inputs that are directly passed to outputs.
        body_graph.outputs.extend(body_graph.internal_captures)

        # Capture the extra `external_captures` of `body_graph` in `cond_graph` so
        # that it expects to receive those as arguments.
        with cond_graph.as_default():
            num_cond_captures = len(cond_graph.external_captures)
            assert (cond_graph.external_captures ==
                    body_graph.external_captures[:num_cond_captures])
            for body_capture in body_graph.external_captures[
                    num_cond_captures:]:
                assert body_capture not in cond_graph.captures
                cond_graph.capture(body_capture)

        # Make sure that the shapes of the loop outputs are compatible with the
        # shape invariants, or the shapes of the loop vars if the invariants are not
        # specified.
        num_flattened_outputs = len(nest.flatten(orig_loop_vars))
        # First var is loop counter and second var is maximum_iterations.
        first_loop_var_index = 2
        _check_shapes_compat(
            body_graph.outputs[first_loop_var_index:first_loop_var_index +
                               num_flattened_outputs],
            nest.flatten(
                shape_invariants[first_loop_var_index:first_loop_var_index +
                                 len_orig_loop_vars]),
            nest.flatten(loop_vars[first_loop_var_index:first_loop_var_index +
                                   len_orig_loop_vars]))
        flattened_loop_vars = nest.flatten(loop_vars)
        _check_num_inputs_outputs(cond_graph, body_graph,
                                  len(flattened_loop_vars))

        with ops.control_dependencies(
                list(cond_graph.control_captures) +
                list(body_graph.control_captures)):
            outputs = gen_functional_ops._while(
                flattened_loop_vars,
                util.create_new_tf_function(cond_graph),
                util.create_new_tf_function(body_graph),
                output_shapes=[t.shape for t in body_graph.outputs],
                parallel_iterations=parallel_iterations,
                name=scope)

        _copy_handle_data(body_graph.outputs, outputs)
        util.maybe_set_lowering_attr(outputs[0].op)
        util.maybe_propagate_compile_time_consts_in_xla(outputs[0].op)

        # Return identities for each output of the While op, rather than the output
        # of the While op directly. This makes pruning work if the output of
        # while_loop() is fetched: the lowering pass converts the While outputs into
        # IdentityN outputs, which if fetched will cause all ops in the body to be
        # run (since it takes all exit ops as input). After lowering, each output
        # identity op will end up with only the appropriate exit op as input.
        outputs = tuple(array_ops.identity(t) for t in outputs)

    outputs = _pack_sequence_as(
        orig_loop_vars, outputs[first_loop_var_index:first_loop_var_index +
                                num_flattened_outputs])

    if return_same_structure:
        return outputs

    flattened_outputs = nest.flatten(outputs)
    if len(flattened_outputs) == 1:
        return flattened_outputs[0]
    else:
        return outputs
Exemplo n.º 10
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
    """The gradient of a While op produced by while_loop."""
    body_graph = _get_body_graph(op)

    # Set the incoming gradient of TensorArray handles to None. The gradient
    # implementation currently assumes all resource tensors correspond to float32
    # ResourceVariables, which can lead to runtime shape errors when used with a
    # TensorArray. This is a workaround until TensorArrays are reimplemented with
    # TensorLists instead of resources.
    # Also set the incoming gradient of non-trainable inputs to None. It is
    # possible that we receive non-None gradients for non-trainable types in
    # nested while loops because we accumulate outputs of the inner while as
    # variant tensors which are trainable and hence receive zeros_like tensors in
    # the gradient pass. The non-trainable tensors then receive the popped zeros
    # tensor from this zeros variant. The gradient for the loop vars corresponding
    # to these tensors is None or zeros (this happens only if the loop var is
    # accumulated as well) in _grad_fn so we reset these.
    # TODO(b/118712257): Remove the IsTrainable filter once we can handle None
    # output grads in _grad_fn.
    grads = [
        None if _is_tensor_array_handle(output)
        or not gradients_impl.IsTrainable(output) else grad
        for grad, output in zip(grads, op.outputs)
    ]

    # Ensure that all non-resource trainable outputs have incoming gradients.
    assert all(g is not None or o.dtype == dtypes.resource
               or not gradients_impl.IsTrainable(o)
               for o, g in zip(op.outputs, grads)
               ), "All trainable loop vars must receive incoming gradients."
    # We compute the gradient for the sub-graph between trainable ys and xs
    # with non-None incoming gradients. We later pad the None's to the list of
    # outputs.
    ys, xs, non_none_grads = zip(
        *[(y, x, grad)
          for (y, x, grad) in zip(body_graph.outputs, body_graph.inputs, grads)
          if grad is not None])

    body_grad_graph, args = _create_grad_func(
        ys, xs, non_none_grads, body_graph,
        util.unique_grad_fn_name(body_graph.name), op)

    intermediate_tensors = _get_intermediates(body_grad_graph)

    maximum_iterations = op.get_attr(
        "_maximum_iterations") if _is_in_xla_context() else None
    assert not _is_in_xla_context() or maximum_iterations is not None
    for intermediate_tensor in intermediate_tensors:
        tensor_list = list_ops.empty_tensor_list(
            element_dtype=intermediate_tensor.dtype,
            element_shape=_get_tensor_convertible_shape(
                intermediate_tensor.shape),
            max_num_elements=maximum_iterations)

        with body_grad_graph.as_default():
            tensor_list_ph = body_grad_graph.capture(tensor_list,
                                                     whitelisted=True)
            # Push the intermediate tensor to the tensor list.
            appended_tensor_list = list_ops.tensor_list_push_back(
                tensor_list_ph, intermediate_tensor)
            # Add this modified tensor list to the list of outputs.
            body_grad_graph.outputs.append(appended_tensor_list)

    def grad_cond(counter, max_iters, *unused_args):
        return counter < max_iters

    loop_vars = args + body_grad_graph.external_captures
    grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
    cond_grad_graph = func_graph_module.func_graph_from_py_func(
        grad_cond_name,
        grad_cond,
        loop_vars, {},
        func_graph=util.WhileCondFuncGraph(grad_cond_name))

    _check_num_inputs_outputs(cond_grad_graph, body_grad_graph, len(loop_vars))

    outputs = gen_functional_ops._while(
        loop_vars,
        util.create_new_tf_function(cond_grad_graph),
        util.create_new_tf_function(body_grad_graph),
        output_shapes=[t.shape for t in body_grad_graph.outputs],
        name="%s_grad" % op.name)

    _copy_handle_data(body_grad_graph.outputs, outputs)
    _maybe_set_lowering_attr(outputs[0].op)
    _maybe_set_maximum_iterations_attr(outputs[0].op, maximum_iterations)

    # Set None as the output gradient for tensors with None input gradient
    # e.g. TensorArray handles.
    # outputs[0] is the loop counter.
    # outputs[1] is the total number of loop iterations.
    index = 2
    none_padded_outputs = []
    for g in grads:
        if g is None:
            none_padded_outputs.append(None)
        else:
            none_padded_outputs.append(outputs[index])
            index += 1
    return none_padded_outputs
Exemplo n.º 11
0
def while_loop(cond,
               body,
               loop_vars,
               shape_invariants=None,
               maximum_iterations=None,
               name=None):
    """Like tf.while_loop, except emits a single While op."""
    maximum_iterations = _validate_and_convert_to_tensor(maximum_iterations)
    # Keep the original loop_vars around to know which args were TensorArrays.
    orig_loop_vars = loop_vars
    # Cache its length since we use it at multiple places below.
    len_orig_loop_vars = len(orig_loop_vars)

    # Convert TensorArrays to their flow variables. These get converted back to
    # TensorArrays before calling `cond` and `body`. See `wrapped_cond` and
    # `wrapped_body` below.
    loop_vars = list(_tensor_array_to_flow(orig_loop_vars))
    loop_vars = nest.map_structure(
        ops.internal_convert_to_tensor_or_indexed_slices, loop_vars)
    if shape_invariants is not None:
        nest.assert_same_structure(orig_loop_vars, shape_invariants)
    else:
        shape_invariants = nest.map_structure(lambda t: t.shape, loop_vars)

    if not name:
        name = "while"

    with ops.name_scope(name) as scope:
        with ops.name_scope(None):
            cond_name = util.unique_fn_name(scope, "cond")
            body_name = util.unique_fn_name(scope, "body")

        loop_counter = constant_op.constant(
            0,
            dtype=maximum_iterations.dtype
            if maximum_iterations is not None else None,
            name="loop_counter")
        # Add loop counter needed for computing gradients.
        loop_vars = [loop_counter] + loop_vars

        shape_invariants = [tensor_shape.scalar()] + shape_invariants

        # Automatic control dependencies are added in defuns, but not in v1
        # graphs. Propagate that behavior here.
        add_control_dependencies = util.in_defun()

        # Build a `cond` wrapper that can handle the extra counter loop_var.
        def wrapped_cond(loop_counter, *args):
            # Convert the flow variables in `args` to TensorArrays. `args` should
            # already have the same structure as `orig_loop_vars` but currently there
            # is no nest.zip so we call `_pack_sequence_as` which flattens both
            # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
            # and packs it into the structure of `orig_loop_vars`.
            if maximum_iterations is None:
                return cond(*_pack_sequence_as(orig_loop_vars, args))
            else:
                return math_ops.logical_and(
                    loop_counter < maximum_iterations,
                    cond(*_pack_sequence_as(orig_loop_vars, args)))

        cond_graph = func_graph_module.func_graph_from_py_func(
            cond_name,
            wrapped_cond,
            loop_vars, {},
            signature=_build_signature(loop_vars, shape_invariants),
            func_graph=util.WhileCondFuncGraph(cond_name),
            add_control_dependencies=add_control_dependencies)

        # Add external_captures of cond to the list of loop vars.
        # Note that external tensors will be treated as loop invariants, i.e.,
        # the value of that tensor in each iteration is the same as it was at the
        # beginning of the loop execution.
        loop_vars = loop_vars + cond_graph.external_captures
        shape_invariants = shape_invariants + [
            t.shape for t in cond_graph.external_captures
        ]

        def wrapped_body(loop_counter, *args):
            """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args
          args[:len_orig_loop_vars] - Args for the original loop body.
          args[len_orig_loop_vars:] - External captures of cond. These get
            passed through as is.

      Returns:
        A list of tensors the same length as args.
      """
            # Convert the flow variables in `args` to TensorArrays. `args` should
            # already have the same structure as `orig_loop_vars` but currently there
            # is no nest.zip so we call `_pack_sequence_as` which flattens both
            # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
            # and packs it into the structure of `orig_loop_vars`.
            outputs = body(
                *_pack_sequence_as(orig_loop_vars, args[:len_orig_loop_vars]))
            if not nest.is_sequence(outputs):
                outputs = [outputs]
            # Compare the structure of input and output of body converting the
            # top-level tuples to list to be compatible with legacy while_loop.
            nest.assert_same_structure(list(outputs), list(orig_loop_vars))

            outputs = _tensor_array_to_flow(outputs)

            # Return the external_captures of cond_graph as is, i.e., treat them as
            # loop invariants.
            # TODO(srbs): Update lowering code to create _Enter nodes with
            # is_constant=True for inputs that are directly passed to outputs.
            return [loop_counter + 1] + list(outputs) + list(
                args[len_orig_loop_vars:])

        body_graph = func_graph_module.func_graph_from_py_func(
            body_name,
            wrapped_body,
            loop_vars, {},
            signature=_build_signature(loop_vars, shape_invariants),
            func_graph=util.WhileBodyFuncGraph(body_name),
            add_control_dependencies=add_control_dependencies)
        # Add external captures of body to the list of loop vars.
        # Note that external tensors will be treated as loop invariants, i.e.,
        # the value of that tensor in each iteration is the same as it was at the
        # beginning of the loop execution.
        loop_vars = loop_vars + body_graph.external_captures
        # TODO(srbs): Update lowering code to create _Enter nodes with
        # is_constant=True for inputs that are directly passed to outputs.
        body_graph.outputs.extend(body_graph.internal_captures)

        # Capture `external_captures` of `body_graph` in `cond_graph` so that it
        # expects to receive those as arguments.
        # TODO(b/118457764): Dedup tensors that are captured in both the cond and
        # body. This logic already exists in cond_v2.
        with cond_graph.as_default():
            for external_capture in body_graph.external_captures:
                assert external_capture not in cond_graph.captures, (
                    "Looks like both cond and body are capturing the same tensor %s. "
                    "This is not supported yet. For now consider passing,"
                    " this as a loop variable." % str(external_capture))
                cond_graph.capture(external_capture)

        # Export all tensors in the loop body that may be needed for gradient
        # computation. We do this by accumulating the intermediate values in
        # TensorLists.
        intermediate_tensors = _get_intermediates(body_graph)

        for intermediate_tensor in intermediate_tensors:
            tensor_list = list_ops.empty_tensor_list(
                element_dtype=intermediate_tensor.dtype,
                element_shape=_get_tensor_convertible_shape(
                    intermediate_tensor.shape),
                max_num_elements=maximum_iterations)
            loop_vars.append(tensor_list)
            with cond_graph.as_default():
                # Add a placeholder to cond_graph's inputs corresponding to the
                # tensor_list.
                cond_graph.capture(tensor_list)
            with body_graph.as_default():
                # Push the intermediate tensor to the tensor list. This captures the
                # `tensor_list` as well.
                appended_tensor_list = list_ops.tensor_list_push_back(
                    tensor_list, intermediate_tensor)
                # Add this modified tensor list to the list of outputs.
                body_graph.outputs.append(appended_tensor_list)

        # Make sure that the shapes of the loop outputs are compatible with the
        # shape invariants, or the shapes of the loop vars if the invariants are not
        # specified.
        num_flattened_outputs = len(nest.flatten(orig_loop_vars))
        _check_shapes_compat(
            body_graph.outputs[1:1 + num_flattened_outputs],
            nest.flatten(shape_invariants[1:1 + len_orig_loop_vars]),
            nest.flatten(loop_vars[1:1 + len_orig_loop_vars]))
        flattened_loop_vars = nest.flatten(loop_vars)
        _check_num_inputs_outputs(cond_graph, body_graph,
                                  len(flattened_loop_vars))

        outputs = gen_functional_ops._while(
            flattened_loop_vars,
            util.create_new_tf_function(cond_graph),
            util.create_new_tf_function(body_graph),
            output_shapes=[t.shape for t in body_graph.outputs],
            name=scope)

        _copy_handle_data(body_graph.outputs, outputs)
        _maybe_set_lowering_attr(outputs[0].op)
        _maybe_set_maximum_iterations_attr(outputs[0].op, maximum_iterations)

        # Return identities for each output of the While op, rather than the output
        # of the While op directly. This makes pruning work if the output of
        # while_loop() is fetched: the lowering pass converts the While outputs into
        # IdentityN outputs, which if fetched will cause all ops in the body to be
        # run (since it takes all exit ops as input). After lowering, each output
        # identity op will end up with only the appropriate exit op as input.
        outputs = tuple(array_ops.identity(t) for t in outputs)

    # First var is loop counter.
    if num_flattened_outputs == 1:
        return outputs[1]
    else:
        return _pack_sequence_as(orig_loop_vars,
                                 outputs[1:1 + num_flattened_outputs])
Exemplo n.º 12
0
def while_loop(cond, body, loop_vars, shape_invariants=None, name=None):
    """Like tf.while_loop, except emits a single While op."""
    flattened_loop_vars = nest.flatten(loop_vars)
    if shape_invariants is not None:
        nest.assert_same_structure(loop_vars, shape_invariants)
        flattened_shapes = nest.flatten(shape_invariants)
    else:
        flattened_shapes = [t.shape for t in flattened_loop_vars]

    del shape_invariants

    if not name:
        name = "while"

    with ops.name_scope(name) as scope:
        with ops.name_scope(None):
            cond_name = util.unique_fn_name(scope, "cond")
            body_name = util.unique_fn_name(scope, "body")

        num_outputs = len(flattened_loop_vars)

        # Add loop counter needed for computing gradients.
        flattened_loop_vars = [constant_op.constant(0., name="loop_counter")
                               ] + flattened_loop_vars

        flattened_shapes = [tensor_shape.scalar()] + flattened_shapes

        # Automatic control dependencies are added in defuns, but not in v1
        # graphs. Propagate that behavior here.
        add_control_dependencies = util.in_defun()

        # Build a `cond` wrapper that can handle the extra counter loop_var.
        def wrapped_cond(unused_loop_counter, *loop_vars):
            return cond(*loop_vars)

        signature = [
            tensor_spec.TensorSpec(shape, t.dtype)
            for shape, t in zip(flattened_shapes, flattened_loop_vars)
        ]
        cond_graph = function.func_graph_from_py_func(
            cond_name,
            wrapped_cond,
            flattened_loop_vars, {},
            signature=signature,
            func_graph=util.WhileCondFuncGraph(cond_name),
            add_control_dependencies=add_control_dependencies)

        # Add external_captures of cond to the list of loop vars.
        # Note that external tensors will be treated as loop invariants, i.e.,
        # the value of that tensor in each iteration is the same as it was at the
        # beginning of the loop execution.
        flattened_loop_vars = flattened_loop_vars + cond_graph.external_captures
        flattened_shapes = flattened_shapes + [
            t.shape for t in cond_graph.external_captures
        ]

        def wrapped_body(loop_counter, *args):
            """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args
          args[:num_outputs] - Args for the original loop body.
          args[num_outputs:] - External captures of cond. These get passed
            through as is.

      Returns:
        A list of tensors the same length as args.
      """
            outputs = body(*args[:num_outputs])
            if not isinstance(outputs, collections.Sequence):
                outputs = [outputs]

            # Return the external_captures of cond_graph as is, i.e., treat them as
            # loop invariants.
            # TODO(srbs): Update lowering code to create _Enter nodes with
            # is_constant=True for inputs that are directly passed to outputs.
            return [loop_counter + 1] + list(outputs) + list(
                args[num_outputs:])

        signature = [
            tensor_spec.TensorSpec(shape, t.dtype)
            for shape, t in zip(flattened_shapes, flattened_loop_vars)
        ]
        body_graph = function.func_graph_from_py_func(
            body_name,
            wrapped_body,
            flattened_loop_vars, {},
            signature=signature,
            func_graph=util.WhileBodyFuncGraph(body_name),
            add_control_dependencies=add_control_dependencies)
        # Add external captures of body to the list of loop vars.
        # Note that external tensors will be treated as loop invariants, i.e.,
        # the value of that tensor in each iteration is the same as it was at the
        # beginning of the loop execution.
        flattened_loop_vars = flattened_loop_vars + body_graph.external_captures
        # TODO(srbs): Update lowering code to create _Enter nodes with
        # is_constant=True for inputs that are directly passed to outputs.
        body_graph.outputs.extend(body_graph.internal_captures)

        # Capture `external_captures` of `body_graph` in `cond_graph` so that it
        # expects to receive those as arguments.
        # TODO(srbs): Dedup tensors that are captured in both the cond and body.
        # This logic already exists in cond_v2.
        with cond_graph.as_default():
            for external_capture in body_graph.external_captures:
                cond_graph.capture(external_capture)

        # Export all tensors in the loop body that may be needed for gradient
        # computation. We do this by accumulating the intermediate values in
        # TensorLists.
        intermediate_tensors = _get_intermediates(body_graph)

        for intermediate_tensor in intermediate_tensors:
            # TODO(srbs): Cache and re-use empty tensor lists.
            tensor_list = list_ops.empty_tensor_list(
                element_dtype=intermediate_tensor.dtype,
                element_shape=_get_tensor_convertible_shape(
                    intermediate_tensor.shape))
            flattened_loop_vars.append(tensor_list)
            with cond_graph.as_default():
                # Add a placeholder to cond_graph's inputs corresponding to the
                # tensor_list.
                cond_graph.capture(tensor_list)
            with body_graph.as_default():
                # Push the intermediate tensor to the tensor list. This captures the
                # `tensor_list` as well.
                appended_tensor_list = list_ops.tensor_list_push_back(
                    tensor_list, intermediate_tensor)
                # Add this modified tensor list to the list of outputs.
                body_graph.outputs.append(appended_tensor_list)

        # Make sure that the shapes of the loop outputs are compatible with the
        # shape invariants, or the shapes of the loop vars if the invariants are not
        # specified.
        _check_shapes_compat(body_graph.outputs[1:1 + num_outputs],
                             flattened_shapes[1:1 + num_outputs],
                             flattened_loop_vars[1:1 + num_outputs])
        outputs = gen_functional_ops._while(
            flattened_loop_vars,
            util.create_new_tf_function(cond_graph),
            util.create_new_tf_function(body_graph),
            output_shapes=[t.shape for t in body_graph.outputs],
            name=scope)

        _copy_handle_data(body_graph.outputs, outputs)
        _maybe_set_lowering_attr(outputs[0].op)

        # Return identities for each output of the While op, rather than the output
        # of the While op directly. This makes pruning work if the output of
        # while_loop() is fetched: the lowering pass converts the While outputs into
        # IdentityN outputs, which if fetched will cause all ops in the body to be
        # run (since it takes all exit ops as input). After lowering, each output
        # identity op will end up with only the appropriate exit op as input.
        outputs = tuple(array_ops.identity(t) for t in outputs)

    # First var is loop counter.
    if num_outputs == 1:
        return outputs[1]
    else:
        return nest.pack_sequence_as(loop_vars, outputs[1:1 + num_outputs])
Exemplo n.º 13
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
    """The gradient of a While op produced by while_loop."""
    body_graph = _get_body_graph(op)

    # Replace None gradients with zeros. This is needed because `grads` could have
    # None incoming gradients for the TensorLists. If we pass None's through, the
    # custom gradient of TensorListPopBack will create an EmptyTensorList inside
    # the FuncGraph which is undesirable.
    # TODO(b/80444525): There might be an issue with treating no gradient as zero
    # gradient in certain cases. Consider replacing None gradients with Zeros
    # for accumulators only.
    grads = [
        g if g is not None else array_ops.zeros_like(output)
        for g, output in zip(grads, op.outputs)
    ]

    body_grad_graph, args = _create_grad_func(
        body_graph, grads, util.unique_grad_fn_name(body_graph.name), op)

    intermediate_tensors = _get_intermediates(body_grad_graph)

    for intermediate_tensor in intermediate_tensors:
        tensor_list = list_ops.empty_tensor_list(
            element_dtype=intermediate_tensor.dtype,
            element_shape=_get_tensor_convertible_shape(
                intermediate_tensor.shape))
        with body_grad_graph.as_default():
            tensor_list_ph = body_grad_graph.capture(tensor_list,
                                                     whitelisted=True)
            # Push the intermediate tensor to the tensor list.
            appended_tensor_list = list_ops.tensor_list_push_back(
                tensor_list_ph, intermediate_tensor)
            # Add this modified tensor list to the list of outputs.
            body_grad_graph.outputs.append(appended_tensor_list)

    def grad_cond(counter, max_iters, *unused_args):
        return counter < max_iters

    loop_vars = args + body_grad_graph.external_captures
    grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
    cond_grad_graph = function.func_graph_from_py_func(
        grad_cond_name,
        grad_cond,
        loop_vars, {},
        func_graph=util.WhileCondFuncGraph(grad_cond_name))

    assert len(loop_vars) == len(body_grad_graph.inputs)
    assert len(loop_vars) == len(body_grad_graph.outputs)
    assert len(loop_vars) == len(cond_grad_graph.inputs)

    outputs = gen_functional_ops._while(
        loop_vars,
        util.create_new_tf_function(cond_grad_graph),
        util.create_new_tf_function(body_grad_graph),
        output_shapes=[t.shape for t in body_grad_graph.outputs],
        name="%s_grad" % op.name)

    _copy_handle_data(body_grad_graph.outputs, outputs)
    _maybe_set_lowering_attr(outputs[0].op)

    # outputs[0] is the loop counter.
    # outputs[1] is the total number of loop iterations.
    return outputs[2:2 + len(op.inputs)]
Exemplo n.º 14
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
  """The gradient of a While op produced by while_loop."""
  cond_graph = _get_graph(op, "cond")
  body_graph = _get_graph(op, "body")
  orig_num_params = len(body_graph.outputs)

  maximum_iterations = op.get_attr(
      "_maximum_iterations") if _is_in_xla_context() else None
  assert not _is_in_xla_context() or maximum_iterations is not None

  # Set the incoming gradient of non-trainable inputs to None. It is possible
  # that we receive non-None gradients for non-trainable types in nested while
  # loops because we accumulate outputs of the inner while as variant tensors
  # which are trainable and hence receive zeros_like tensors in the gradient
  # pass. The non-trainable tensors then receive the popped zeros tensor from
  # this zeros variant. The gradient for the loop vars corresponding to these
  # tensors is None or zeros (this happens only if the loop var is accumulated
  # as well) in _grad_fn so we reset these.
  # TODO(b/118712257): Remove the IsTrainable filter once we can handle None
  # output grads in _grad_fn.
  grads = [
      None if not _is_trainable(output) else grad
      for grad, output in zip(grads, body_graph.outputs)
  ]

  # Ensure that all non-resource trainable outputs have incoming gradients.
  assert all(g is not None or o.dtype == dtypes.resource or not _is_trainable(o)
             for o, g in zip(body_graph.outputs, grads)
            ), "All trainable loop vars must receive incoming gradients."
  # We compute the gradient for the sub-graph between trainable ys and xs
  # with non-None incoming gradients. We later pad the None's to the list of
  # outputs.
  ys, xs, non_none_grads = zip(*[(y, x, grad) for (y, x, grad) in zip(
      body_graph.outputs, body_graph.inputs, grads) if grad is not None])

  body_grad_graph, args = _create_grad_func(
      ys, xs, non_none_grads, cond_graph, body_graph,
      util.unique_grad_fn_name(body_graph.name), op, maximum_iterations)

  if body_grad_graph.while_op_needs_rewrite:
    # Modify 'op' to output the intermediate accumulators needed by the grad
    # function.
    # NOTE(skyewm): if there are any active sessions, this modification to `op`
    # may make them unrunnable!

    cond_graph.name += "_rewritten"
    body_graph.name += "_rewritten"

    new_inputs = body_grad_graph.empty_tensor_lists
    new_outputs = body_graph.outputs[orig_num_params:]

    op._set_func_attr("cond", util.create_new_tf_function(cond_graph))
    op._set_func_attr("body", util.create_new_tf_function(body_graph))
    op._set_type_list_attr("T", body_graph.output_types)
    op._set_shape_list_attr("output_shapes", body_graph.output_shapes)
    op._add_while_inputs(new_inputs)
    op._add_outputs([t.dtype for t in new_outputs],
                    [t.shape for t in new_outputs])
    _copy_handle_data(new_outputs, op.outputs[orig_num_params:])

  captured_inputs = _resolve_grad_captures(body_graph, body_grad_graph, op)
  loop_vars = args + captured_inputs

  def grad_cond(counter, max_iters, *unused_args):
    return counter < max_iters

  grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
  cond_grad_graph = func_graph_module.func_graph_from_py_func(
      grad_cond_name, grad_cond, loop_vars, {},
      func_graph=util.WhileCondFuncGraph(grad_cond_name))

  _check_num_inputs_outputs(cond_grad_graph, body_grad_graph, len(loop_vars))

  outputs = gen_functional_ops._while(
      loop_vars,
      util.create_new_tf_function(cond_grad_graph),
      util.create_new_tf_function(body_grad_graph),
      output_shapes=[t.shape for t in body_grad_graph.outputs],
      name="%s_grad" % op.name)

  _copy_handle_data(body_grad_graph.outputs, outputs)
  util.maybe_set_lowering_attr(outputs[0].op)
  _maybe_set_maximum_iterations_attr(outputs[0].op, maximum_iterations)

  # See comment in while_loop.
  outputs = [array_ops.identity(t) for t in outputs]

  # Set None as the output gradient for tensors with None input gradient.
  # outputs[0] is the loop counter.
  # outputs[1] is the total number of loop iterations.
  index = 2
  none_padded_outputs = []
  for g in grads:
    if g is None:
      none_padded_outputs.append(None)
    else:
      none_padded_outputs.append(outputs[index])
      index += 1
  return none_padded_outputs
Exemplo n.º 15
0
def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
  """The gradient of a While op produced by while_loop."""
  # Note that op is not always the same as while_op because the gradient tape,
  # for eager mode compatibility, forgets information about the proper op. Since
  # the loop cannot run in eager mode, however, we can safely introspect into
  # the graph here.
  while_op = op.outputs[0].op
  cond_graph = _get_graph(while_op, "cond")
  body_graph = _get_graph(while_op, "body")
  orig_num_params = len(body_graph.outputs)

  maximum_iterations = op.get_attr(
      "_maximum_iterations") if _is_in_xla_context() else None
  parallel_iterations = op.get_attr("parallel_iterations")
  assert not _is_in_xla_context() or maximum_iterations is not None
  maximum_iterations = _validate_and_convert_to_tensor(maximum_iterations)

  grads = [_preprocess_grad(grad, body_out, while_out)
           for grad, body_out, while_out
           in zip(grads, body_graph.outputs, while_op.outputs)]

  # We compute the gradient for the sub-graph between trainable ys and xs
  # with non-None incoming gradients. We later pad the None's to the list of
  # outputs.
  ys, xs, non_none_grads = zip(*[(y, x, grad) for (y, x, grad) in zip(
      body_graph.outputs, body_graph.inputs, grads) if grad is not None])

  body_grad_graph, args = _create_grad_func(
      ys, xs, non_none_grads, cond_graph, body_graph,
      util.unique_grad_fn_name(body_graph.name), op, maximum_iterations)

  if body_grad_graph.while_op_needs_rewrite:
    # Modify 'op' to output the intermediate accumulators needed by the grad
    # function.
    # NOTE(skyewm): if there are any active sessions, this modification to `op`
    # may make them unrunnable!

    cond_graph.name += "_rewritten"
    body_graph.name += "_rewritten"

    new_inputs = body_grad_graph.empty_tensor_lists
    new_outputs = body_graph.outputs[orig_num_params:]

    while_op._set_func_attr("cond", util.create_new_tf_function(cond_graph))
    while_op._set_func_attr("body", util.create_new_tf_function(body_graph))
    while_op._set_type_list_attr("T", body_graph.output_types)
    while_op._set_shape_list_attr("output_shapes", body_graph.output_shapes)
    while_op._add_while_inputs(new_inputs)
    while_op._add_outputs([t.dtype for t in new_outputs],
                          [t.shape for t in new_outputs])
    _copy_handle_data(new_outputs, op.outputs[orig_num_params:])

  captured_inputs = _resolve_grad_captures(body_graph, body_grad_graph,
                                           while_op)
  loop_vars = args + captured_inputs

  # This modifies body_grad_graph.
  loop_vars = while_v2_indexed_slices_rewriter.rewrite_grad_indexed_slices(
      grads, body_grad_graph, loop_vars, while_op.inputs)

  def grad_cond(counter, max_iters, *unused_args):
    return counter < max_iters

  grad_cond_name = util.unique_grad_fn_name(op.get_attr("cond").name)
  cond_grad_graph = func_graph_module.func_graph_from_py_func(
      grad_cond_name, grad_cond, loop_vars, {},
      func_graph=util.WhileCondFuncGraph(grad_cond_name))

  _check_num_inputs_outputs(cond_grad_graph, body_grad_graph, len(loop_vars))

  outputs = gen_functional_ops._while(
      loop_vars,
      util.create_new_tf_function(cond_grad_graph),
      util.create_new_tf_function(body_grad_graph),
      output_shapes=[t.shape for t in body_grad_graph.outputs],
      parallel_iterations=parallel_iterations,
      name="%s_grad" % while_op.name)
  grad_op = outputs[0].op

  _copy_handle_data(body_grad_graph.outputs, outputs)
  util.maybe_set_lowering_attr(grad_op)
  _maybe_set_maximum_iterations_attr(grad_op, maximum_iterations)

  # See comment in while_loop.
  outputs = [array_ops.identity(t) for t in outputs]
  return _get_structured_grad_output(outputs, grads, body_grad_graph)
Exemplo n.º 16
0
def While(input_, cond, body, name=None, hostmem=None):
    r"""output = input; While (Cond(output)) { output = Body(output) }.

  Args:
    input_: A list of `Tensor` objects.
      A list of input tensors whose types are T.
    cond: . A function takes 'input' and returns a tensor.  If the tensor is
      a scalar of non-boolean, the scalar is converted to a boolean
      according to the following rule: if the scalar is a numerical
      value, non-zero means True and zero means False; if the scalar is
      a string, non-empty means True and empty means False. If the
      tensor is not a scalar, non-emptiness means True and False
      otherwise.
    body: . A function takes a list of tensors and returns another
      list tensors. Both lists have the same types as specified
      by T.
    name: A name for the operation (optional).
    hostmem: A list of integer. If i is in the list, input[i] is a
      host memory tensor.

  Raises:
    ValueError: if `cond` has implicitly captured inputs or if `cond` and `body`
      have different signatures.

  Returns:
    A list of `Tensor` objects. Has the same type as `input`.
    A list of output tensors whose types are T.
  """
    if cond.captured_inputs:
        raise ValueError("While op 'cond' argument must be a function "
                         "without implicitly captured inputs.")

    if cond.declared_input_types != body.declared_input_types:
        raise ValueError(
            "While op 'cond' and 'body' signatures do not match. %r vs %r" %
            (cond.declared_input_types, body.declared_input_types))

    if body.captured_inputs:
        cond_dtypes = list(body.declared_input_types) + [
            t.dtype for t in body.captured_inputs
        ]

        @function.Defun(*cond_dtypes, func_name="%s_Wrapper" % cond.name)
        def CondWrapper(*args):
            """A wrapper that handles loop-carried captured inputs."""
            return cond(*args[:len(body.declared_input_types)])

        ret = gen_functional_ops._while(input_ + body.captured_inputs,
                                        CondWrapper,
                                        _LoopBodyCaptureWrapper(body),
                                        name=name)
        # Slice off the loop-carried captured inputs.
        ret = ret[:-len(body.captured_inputs)]
    else:
        ret = gen_functional_ops._while(input_, cond, body, name=name)
    if hostmem:
        input_attr = attr_value_pb2.AttrValue()
        input_attr.list.i.extend(hostmem)
        ret[0].op._set_attr("_input_hostmem", input_attr)  # pylint: disable=protected-access

        output_attr = attr_value_pb2.AttrValue()
        output_attr.list.i.extend(hostmem)
        ret[0].op._set_attr("_output_hostmem", output_attr)  # pylint: disable=protected-access
    return ret
Exemplo n.º 17
0
def while_loop(cond,
               body,
               loop_vars,
               shape_invariants=None,
               parallel_iterations=10,
               maximum_iterations=None,
               name=None,
               return_same_structure=True):
  """Like tf.while_loop, except emits a single While op."""
  maximum_iterations = _validate_and_convert_to_tensor(maximum_iterations)
  # Keep the original loop_vars around to know which args were TensorArrays.
  orig_loop_vars = loop_vars
  # Cache its length since we use it at multiple places below.
  len_orig_loop_vars = len(orig_loop_vars)

  # Convert TensorArrays to their flow variables. These get converted back to
  # TensorArrays before calling `cond` and `body`. See `wrapped_cond` and
  # `wrapped_body` below.
  loop_vars = list(_tensor_array_to_flow(orig_loop_vars))
  loop_vars = nest.map_structure(
      ops.internal_convert_to_tensor_or_indexed_slices, loop_vars)
  if shape_invariants is not None:
    nest.assert_same_structure(orig_loop_vars, shape_invariants)
  else:
    shape_invariants = nest.map_structure(lambda t: t.shape, loop_vars)

  if not name:
    name = "while"

  with ops.name_scope(name) as scope:
    with ops.name_scope(None):
      cond_name = util.unique_fn_name(scope, "cond")
      body_name = util.unique_fn_name(scope, "body")

    loop_counter = constant_op.constant(
        0,
        dtype=maximum_iterations.dtype
        if maximum_iterations is not None else None,
        name="loop_counter")
    # Add loop counter needed for computing gradients.
    loop_vars = [loop_counter] + loop_vars

    shape_invariants = type(shape_invariants)([tensor_shape.scalar()
                                              ]) + shape_invariants

    # Automatic control dependencies are added in defuns, but not in v1
    # graphs. Propagate that behavior here.
    add_control_dependencies = ops.get_default_graph()._add_control_dependencies

    # Build a `cond` wrapper that can handle the extra counter loop_var.
    def wrapped_cond(loop_counter, *args):
      # Convert the flow variables in `args` to TensorArrays. `args` should
      # already have the same structure as `orig_loop_vars` but currently there
      # is no nest.zip so we call `_pack_sequence_as` which flattens both
      # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
      # and packs it into the structure of `orig_loop_vars`.
      if maximum_iterations is None:
        return cond(*_pack_sequence_as(orig_loop_vars, args))
      else:
        return math_ops.logical_and(
            loop_counter < maximum_iterations,
            cond(*_pack_sequence_as(orig_loop_vars, args)))

    # NOTE(skyewm): we set collections to the outer graph's collections for
    # compatibility with TPUEstimator.
    cond_graph = func_graph_module.func_graph_from_py_func(
        cond_name,
        wrapped_cond,
        [],  # We provide signature instead of args.
        {},
        signature=_build_signature(loop_vars, shape_invariants),
        func_graph=util.WhileCondFuncGraph(
            cond_name, collections=ops.get_default_graph()._collections),  # pylint: disable=protected-access
        add_control_dependencies=add_control_dependencies)

    def wrapped_body(loop_counter, *args):
      """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args

      Returns:
        A list of tensors the same length as args.
      """
      # Capture the tensors already captured in cond_graph so that they appear
      # in the same order in body_graph.external_captures.
      for t in cond_graph.external_captures:
        ops.get_default_graph().capture(t)

      # Convert the flow variables in `args` to TensorArrays. `args` should
      # already have the same structure as `orig_loop_vars` but currently there
      # is no nest.zip so we call `_pack_sequence_as` which flattens both
      # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
      # and packs it into the structure of `orig_loop_vars`.
      outputs = body(*_pack_sequence_as(orig_loop_vars, args))
      if not nest.is_sequence(outputs):
        outputs = [outputs]
      # Compare the structure of input and output of body converting the
      # top-level tuples to list to be compatible with legacy while_loop.
      nest.assert_same_structure(list(outputs), list(orig_loop_vars))

      outputs = _tensor_array_to_flow(outputs)

      # TODO(srbs): Update lowering code to create _Enter nodes with
      # is_constant=True for inputs that are directly passed to outputs.
      return [loop_counter + 1] + list(outputs)

    body_graph = func_graph_module.func_graph_from_py_func(
        body_name,
        wrapped_body,
        [],  # We provide signature instead of args.
        {},
        signature=_build_signature(loop_vars, shape_invariants),
        func_graph=util.WhileBodyFuncGraph(
            body_name, collections=ops.get_default_graph()._collections),  # pylint: disable=protected-access
        add_control_dependencies=add_control_dependencies)
    # Add external captures of body to the list of loop vars.
    # Note that external tensors will be treated as loop invariants, i.e.,
    # the value of that tensor in each iteration is the same as it was at the
    # beginning of the loop execution.
    loop_vars = loop_vars + body_graph.external_captures
    # TODO(srbs): Update lowering code to create _Enter nodes with
    # is_constant=True for inputs that are directly passed to outputs.
    body_graph.outputs.extend(body_graph.internal_captures)

    # Capture the extra `external_captures` of `body_graph` in `cond_graph` so
    # that it expects to receive those as arguments.
    with cond_graph.as_default():
      num_cond_captures = len(cond_graph.external_captures)
      assert (cond_graph.external_captures ==
              body_graph.external_captures[:num_cond_captures])
      for body_capture in body_graph.external_captures[num_cond_captures:]:
        assert body_capture not in cond_graph.captures
        cond_graph.capture(body_capture)

    # Make sure that the shapes of the loop outputs are compatible with the
    # shape invariants, or the shapes of the loop vars if the invariants are not
    # specified.
    num_flattened_outputs = len(nest.flatten(orig_loop_vars))
    _check_shapes_compat(
        body_graph.outputs[1:1 + num_flattened_outputs],
        nest.flatten(shape_invariants[1:1 + len_orig_loop_vars]),
        nest.flatten(loop_vars[1:1 + len_orig_loop_vars]))
    flattened_loop_vars = nest.flatten(loop_vars)
    _check_num_inputs_outputs(cond_graph, body_graph,
                              len(flattened_loop_vars))

    outputs = gen_functional_ops._while(
        flattened_loop_vars,
        util.create_new_tf_function(cond_graph),
        util.create_new_tf_function(body_graph),
        output_shapes=[t.shape for t in body_graph.outputs],
        parallel_iterations=parallel_iterations,
        name=scope)

    _copy_handle_data(body_graph.outputs, outputs)
    util.maybe_set_lowering_attr(outputs[0].op)
    _maybe_set_maximum_iterations_attr(outputs[0].op, maximum_iterations)

    # Return identities for each output of the While op, rather than the output
    # of the While op directly. This makes pruning work if the output of
    # while_loop() is fetched: the lowering pass converts the While outputs into
    # IdentityN outputs, which if fetched will cause all ops in the body to be
    # run (since it takes all exit ops as input). After lowering, each output
    # identity op will end up with only the appropriate exit op as input.
    outputs = tuple(array_ops.identity(t) for t in outputs)

  # First var is loop counter.
  outputs = _pack_sequence_as(orig_loop_vars,
                              outputs[1:1 + num_flattened_outputs])

  if return_same_structure:
    return outputs

  flattened_outputs = nest.flatten(outputs)
  if len(flattened_outputs) == 1:
    return flattened_outputs[0]
  else:
    return outputs
Exemplo n.º 18
0
def while_loop(cond, body, loop_vars, name=None):
    """Like tf.while_loop, except emits a single While op."""
    if not name:
        name = "while"

    with ops.name_scope(name) as scope:
        with ops.name_scope(None):
            cond_name = _get_unique_name(("%scond" % scope).replace("/", "_"))
            body_name = _get_unique_name(("%sbody" % scope).replace("/", "_"))

        flattened_loop_vars = nest.flatten(loop_vars)
        num_outputs = len(flattened_loop_vars)

        # Add loop counter needed for computing gradients.
        flattened_loop_vars = [constant_op.constant(0., name="loop_counter")
                               ] + flattened_loop_vars

        # Build a `cond` wrapper that can handle the extra counter loop_var.
        def wrapped_cond(unused_loop_counter, *loop_vars):
            return cond(*loop_vars)

        cond_graph = function.func_graph_from_py_func(cond_name, wrapped_cond,
                                                      flattened_loop_vars, {})

        # Add external_captures of cond to the list of loop vars.
        # Note that external tensors will be treated as loop invariants, i.e.,
        # the value of that tensor in each iteration is the same as it was at the
        # beginning of the loop execution.
        flattened_loop_vars = flattened_loop_vars + cond_graph.external_captures

        def wrapped_body(loop_counter, *args):
            """Loop body augmented with counter update.

      Args:
        loop_counter: Loop counter which needs to be incremented in the body.
        *args: List of args
          args[:num_outputs] - Args for the original loop body.
          args[num_outputs:] - External captures of cond. These get passed
            through as is.

      Returns:
        A list of tensors the same length as args.
      """
            outputs = body(*args[:num_outputs])
            if not isinstance(outputs, collections.Sequence):
                outputs = [outputs]

            # Return the external_captures of cond_graph as is, i.e., treat them as
            # loop invariants.
            # TODO(srbs): Update lowering code to create _Enter nodes with
            # is_constant=True for inputs that are directly passed to outputs.
            return [loop_counter + 1] + list(outputs) + list(
                args[num_outputs:])

        body_graph = function.func_graph_from_py_func(body_name, wrapped_body,
                                                      flattened_loop_vars, {})
        # Add external captures of body to the list of loop vars.
        # Note that external tensors will be treated as loop invariants, i.e.,
        # the value of that tensor in each iteration is the same as it was at the
        # beginning of the loop execution.
        flattened_loop_vars = flattened_loop_vars + body_graph.external_captures
        # TODO(srbs): Update lowering code to create _Enter nodes with
        # is_constant=True for inputs that are directly passed to outputs.
        body_graph.outputs.extend(body_graph.internal_captures)

        # Capture `external_captures` of `body_graph` in `cond_graph` so that it
        # expects to receive those as arguments.
        # TODO(srbs): Dedup tensors that are captured in both the cond and body.
        # This logic already exists in cond_v2.
        with cond_graph.as_default():
            for external_capture in body_graph.external_captures:
                cond_graph.capture(external_capture)

        # Export all tensors in the loop body that may be needed for gradient
        # computation. We do this by accumulating the intermediate values in
        # TensorLists.
        intermediate_tensors = _get_intermediates(body_graph)

        for intermediate_tensor in intermediate_tensors:
            # TODO(srbs): Cache and re-use empty tensor lists.
            tensor_list = list_ops.empty_tensor_list(
                element_dtype=intermediate_tensor.dtype,
                element_shape=_get_tensor_convertible_shape(
                    intermediate_tensor.shape))
            flattened_loop_vars.append(tensor_list)
            with cond_graph.as_default():
                # Add a placeholder to cond_graph's inputs corresponding to the
                # tensor_list.
                cond_graph.capture(tensor_list)
            with body_graph.as_default():
                # Push the intermediate tensor to the tensor list. This captures the
                # `tensor_list` as well.
                appended_tensor_list = list_ops.tensor_list_push_back(
                    tensor_list, intermediate_tensor)
                # Add this modified tensor list to the list of outputs.
                body_graph.outputs.append(appended_tensor_list)

        outputs = gen_functional_ops._while(
            flattened_loop_vars,
            cond_v2._create_new_tf_function(cond_graph),
            cond_v2._create_new_tf_function(body_graph),
            name=scope)

        _copy_handle_data(body_graph.outputs, outputs)
        _maybe_set_lowering_attr(outputs[0].op)

    # First var is loop counter.
    if num_outputs == 1:
        return outputs[1]
    else:
        return nest.pack_sequence_as(loop_vars, outputs[1:1 + num_outputs])