def set_weights(distribution_strategy, dist_model, weights):
  """Sets the weights of the replicated models.

  The weights of the replicated models are set to the weights of the original
  model. The weights of the replicated model are Mirrored variables and hence
  we need to use the `update` call within a DistributionStrategy scope.

  Args:
    distribution_strategy: DistributionStrategy used to distribute training
        and validation.
    dist_model: The replicated models on the different devices.
    weights: The weights of the original model.
  """
  assign_ops = []
  for layer in dist_model.layers:
    num_param = len(layer.weights)
    layer_weights = weights[:num_param]
    for sw, w in zip(layer.weights, layer_weights):
      if ops.executing_eagerly_outside_functions():
        sw.assign(w)
      else:
        assign_ops.append(distribution_strategy.unwrap(sw.assign(w)))
    weights = weights[num_param:]

  if not ops.executing_eagerly_outside_functions():
    K.get_session(assign_ops).run(assign_ops)
Exemplo n.º 2
0
def canonicalize(d, default=None):
  """Canonicalize device string.

  If d has missing components, the rest would be deduced from the `default`
  argument or from '/replica:0/task:0/device:CPU:0'. For example:
    If d = '/cpu:0', default='/job:worker/task:1', it returns
      '/job:worker/replica:0/task:1/device:CPU:0'.
    If d = '/cpu:0', default='/job:worker', it returns
      '/job:worker/replica:0/task:0/device:CPU:0'.
    If d = '/gpu:0', default=None, it returns
      '/replica:0/task:0/device:GPU:0'.

  Note: This uses "job:localhost" as the default if executing eagerly.

  Args:
    d: a device string.
    default: a string for default device if d doesn't have all components.

  Returns:
    a canonicalized device string.
  """
  d = tf_device.DeviceSpec.from_string(d)
  assert d.device_type is None or d.device_type == d.device_type.upper(), (
      "Device type '%s' must be all-caps." % (d.device_type,))
  # Fill in missing device fields using defaults.
  result = tf_device.DeviceSpec(
      replica=0, task=0, device_type="CPU", device_index=0)
  if ops.executing_eagerly_outside_functions():
    result.job = "localhost"
  if default:
    result.merge_from(tf_device.DeviceSpec.from_string(default))
  result.merge_from(d)
  return result.to_string()
Exemplo n.º 3
0
 def __init__(self, fn_graph, variable_holder, attrs=None, signature=None):
   super(WrappedFunction, self).__init__(
       fn_graph, attrs=attrs, signature=signature)
   self._variable_holder = variable_holder
   if ops.executing_eagerly_outside_functions():
     # TODO(allenl): Make this work in 1.x?
     self._lift_unlifted_variables()
Exemplo n.º 4
0
  def apply_gradients(self, grads_and_vars, name=None):
    """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      name: Optional name for the returned operation.  Default to the name
        passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
    grads_and_vars = _filter_grads(grads_and_vars)
    var_list = [v for (_, v) in grads_and_vars]
    if distribution_strategy_context.has_distribution_strategy():
      reduced_grads = merge_grads(grads_and_vars)
      grads_and_vars = zip(reduced_grads, var_list)

    with ops.init_scope():
      self._prepare()
      self._create_slots(var_list)
    update_ops = []

    def update_grad_to_var(grad, var):
      """Apply gradient to variable."""
      if isinstance(var, ops.Tensor):
        raise NotImplementedError("Trying to update a Tensor ", var)
      if isinstance(grad, ops.IndexedSlices):
        if var.constraint is not None:
          raise RuntimeError(
              "Cannot use a constraint function on a sparse variable.")
        return self._resource_apply_sparse_duplicate_indices(
            grad.values, var, grad.indices)
      update_op = self._resource_apply_dense(grad, var)
      if var.constraint is not None:
        with ops.control_dependencies([update_op]):
          return var.assign(var.constraint(var))
      else:
        return update_op

    with ops.name_scope(name, self._name) as name:
      for grad, var in grads_and_vars:
        scope_name = ("" if ops.executing_eagerly_outside_functions() else
                      "_" + var.op.name)
        with ops.name_scope("update" + scope_name):
          update_ops.append(update_grad_to_var(grad, var))
      # control dependencies does not work in per replica mode, please change
      # this once b/118841692 is fixed.
      # with ops.control_dependencies(update_ops):
      #   apply_updates = self._iterations.assign_add(1).op
      apply_updates = merge_update_step(update_ops, self.iterations)
      return apply_updates
Exemplo n.º 5
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    if isinstance(inputs, list):
      initial_state = inputs[1:]
      inputs = inputs[0]
    elif initial_state is not None:
      pass
    elif self.stateful:
      initial_state = self.states
    else:
      initial_state = self.get_initial_state(inputs)

    if len(initial_state) != len(self.states):
      raise ValueError('Layer has ' + str(len(self.states)) +
                       ' states but was passed ' + str(len(initial_state)) +
                       ' initial states.')

    if self.go_backwards:
      # Reverse time axis.
      inputs = K.reverse(inputs, 1)

    if ops.executing_eagerly_outside_functions():
      if context.num_gpus() > 0:
        outputs, [new_h, new_c], runtime = cudnn_lstm(
            inputs, initial_state[0], initial_state[1], self.kernel,
            self.recurrent_kernel, self.bias, self.units)
      else:
        outputs, [new_h, new_c], runtime = normal_lstm(
            inputs, initial_state[0], initial_state[1], self.kernel,
            self.recurrent_kernel, self.bias, self.units, self.activation,
            self.recurrent_activation)
    else:
      outputs, [new_h, new_c], runtime = normal_lstm(
          inputs, initial_state[0], initial_state[1], self.kernel,
          self.recurrent_kernel, self.bias, self.units, self.activation,
          self.recurrent_activation)

      function.register(cudnn_lstm, inputs, initial_state[0], initial_state[1],
                        self.kernel, self.recurrent_kernel, self.bias,
                        self.units)

    states = [new_h, new_c]

    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(state_ops.assign(self.states[i], states[i]))
      self.add_update(updates, inputs)

    if self.return_sequences:
      output = outputs
    else:
      output = outputs[:, -1, :]

    if self.return_state:
      return [output] + states
    else:
      return output, runtime
Exemplo n.º 6
0
  def __enter__(self):
    # Only run in V2 Function mode.
    if (context.executing_eagerly() or
        not ops.executing_eagerly_outside_functions()):
      return self

    self._graph = ops.get_default_graph()
    self._num_operations = len(self._graph.get_operations())
    return self
Exemplo n.º 7
0
def current():
  """Return a string (not canonicalized) for the current device."""
  # TODO(josh11b): Work out how this function interacts with ops.colocate_with.
  if ops.executing_eagerly_outside_functions():
    d = context.context().device_name
  else:
    op = _FakeOperation()
    ops.get_default_graph()._apply_device_functions(op)  # pylint: disable=protected-access
    d = op.device
  return d
Exemplo n.º 8
0
def _create_keras_history_helper(tensors, processed_ops, created_layers):
  """Helper method for `create_keras_history`.

  Arguments:
    tensors: A structure of Tensors for which to create Keras metadata.
    processed_ops: Set. TensorFlow operations that have already been wrapped in
      `TensorFlowOpLayer` instances.
    created_layers: List. The `TensorFlowOpLayer` instances created.

  Returns:
    Tuple. First element is the updated set of TensorFlow Operations that
    have been wrapped in `TensorFlowOpLayer` instances. Second element is
    a list of the `TensorFlowOpLayer` instances created.
  """
  # Import of `base_layer` needed in order to create `TensorFlowOpLayer`.
  # Cannot be imported at top because of circular dependencies.
  # TODO(omalleyt): Resolve circular dependency.
  from tensorflow.python.keras.engine import base_layer  # pylint: disable=g-import-not-at-top
  tensor_list = nest.flatten(tensors)
  for tensor in tensor_list:
    if getattr(tensor, '_keras_history', None) is not None:
      continue
    op = tensor.op  # The Op that created this Tensor.
    if op not in processed_ops:
      # Recursively set `_keras_history`.
      op_inputs = list(op.inputs)
      constants = {}
      layer_inputs = []
      for i, op_input in enumerate(op_inputs):
        if uses_keras_history(op_input):
          layer_inputs.append(op_input)
        else:
          # Treat any value not originating from a `keras.Input` as
          # a constant. Variables cannot be supported.
          if (distribution_strategy_context.in_cross_replica_context() and
              not ops.executing_eagerly_outside_functions()):
            # In Legacy Graph mode, evaluating here makes Session be
            # configured improperly.
            constants[i] = op_input
          else:
            constants[i] = backend.function([], op_input)([])
      processed_ops, created_layers = _create_keras_history_helper(
          layer_inputs, processed_ops, created_layers)
      name = op.name
      node_def = op.node_def.SerializeToString()
      op_layer = base_layer.TensorFlowOpLayer(
          node_def, constants=constants, name=name)
      created_layers.append(op_layer)
      op_layer._add_inbound_node(  # pylint: disable=protected-access
          layer_inputs, op.outputs)
      processed_ops.update([op])
  return processed_ops, created_layers
Exemplo n.º 9
0
  def _distributed_apply(self, distribution, grads_and_vars, name):
    """`apply_gradients` using a `DistributionStrategy`."""
    reduced_grads = distribution.extended.batch_reduce_to(
        ds_reduce_util.ReduceOp.SUM, grads_and_vars)
    var_list = [v for _, v in grads_and_vars]
    grads_and_vars = zip(reduced_grads, var_list)

    def apply_grad_to_update_var(var, grad):
      """Apply gradient to variable."""
      if isinstance(var, ops.Tensor):
        raise NotImplementedError("Trying to update a Tensor ", var)
      if isinstance(grad, ops.IndexedSlices):
        if var.constraint is not None:
          raise RuntimeError(
              "Cannot use a constraint function on a sparse variable.")
        return self._resource_apply_sparse_duplicate_indices(
            grad.values, var, grad.indices)
      update_op = self._resource_apply_dense(grad, var)
      if var.constraint is not None:
        with ops.control_dependencies([update_op]):
          return var.assign(var.constraint(var))
      else:
        return update_op

    update_ops = []
    with backend.name_scope(name or self._name):
      for grad, var in grads_and_vars:
        scope_name = ("" if ops.executing_eagerly_outside_functions() else
                      "_" + var.op.name)
        with backend.name_scope("update" + scope_name):
          update_ops.extend(
              distribution.extended.update(
                  var, apply_grad_to_update_var, args=(grad,), group=False))

      any_symbolic = any(isinstance(i, ops.Operation) or
                         tf_utils.is_symbolic_tensor(i) for i in update_ops)
      if not context.executing_eagerly() or any_symbolic:
        # If the current context is graph mode or any of the update ops are
        # symbolic then the step update should be carried out under a graph
        # context. (eager updates execute immediately)
        with ops._get_graph_from_inputs(update_ops).as_default():  # pylint: disable=protected-access
          with ops.control_dependencies(update_ops):
            return self._iterations.assign_add(1).op

      return self._iterations.assign_add(1)
Exemplo n.º 10
0
 def restore_variables(self, wrapped, saver):
   """Restores variables from the checkpoint."""
   if saver is not None:
     saver_def = saver.saver_def
     filename_tensor = wrapped.graph.as_graph_element(
         saver_def.filename_tensor_name)
     # We both feed and fetch filename_tensor so we have an operation to use to
     # feed into variable initializers (only relevant for v1 graph building).
     restore_fn = wrapped.prune(
         feeds=[filename_tensor],
         fetches=[filename_tensor,
                  wrapped.graph.as_graph_element(saver_def.restore_op_name)])
     initializer, _ = restore_fn(constant_op.constant(self._variables_path))
     if not ops.executing_eagerly_outside_functions():
       for variable in wrapped.graph.get_collection_ref(
           ops.GraphKeys.GLOBAL_VARIABLES):
         # pylint: disable=protected-access
         variable._initializer_op = initializer
Exemplo n.º 11
0
 def __del__(self):
   if ops.executing_eagerly_outside_functions():
     return
   if self._sated:
     return
   if self._fatal_error_if_unsated:
     logger = tf_logging.fatal
   else:
     logger = tf_logging.error
   creation_stack = ''.join(
       [line.rstrip() for line in traceback.format_stack(self._stack_frame)])
   logger(
       '==================================\n'
       'Object was never used (type %s):\n%s\nIf you want to mark it as '
       'used call its "mark_used()" method.\nIt was originally created '
       'here:\n%s\n'
       '==================================' %
       (self._type, self._repr, creation_stack))
Exemplo n.º 12
0
  def _distributed_apply(self, distribution, grads_and_vars, name):
    """`apply_gradients` using a `DistributionStrategy`."""
    reduced_grads = distribution.extended.batch_reduce_to(
        ds_reduce_util.ReduceOp.SUM, grads_and_vars)
    var_list = [v for _, v in grads_and_vars]
    grads_and_vars = zip(reduced_grads, var_list)

    def apply_grad_to_update_var(var, grad):
      """Apply gradient to variable."""
      if isinstance(var, ops.Tensor):
        raise NotImplementedError("Trying to update a Tensor ", var)
      if isinstance(grad, ops.IndexedSlices):
        if var.constraint is not None:
          raise RuntimeError(
              "Cannot use a constraint function on a sparse variable.")
        return self._resource_apply_sparse_duplicate_indices(
            grad.values, var, grad.indices)
      update_op = self._resource_apply_dense(grad, var)
      if var.constraint is not None:
        with ops.control_dependencies([update_op]):
          return var.assign(var.constraint(var))
      else:
        return update_op

    update_ops = []
    with ops.name_scope(name, self._name) as name:
      for grad, var in grads_and_vars:
        scope_name = ("" if ops.executing_eagerly_outside_functions() else
                      "_" + var.op.name)
        with ops.name_scope("update" + scope_name):
          update_ops.extend(
              distribution.extended.update(
                  var, apply_grad_to_update_var, args=(grad,), group=False))
      with ops.control_dependencies(update_ops):
        apply_updates = self._iterations.assign_add(1)
      if not context.executing_eagerly():
        apply_updates = apply_updates.op
      return apply_updates
Exemplo n.º 13
0
  def get_slot(self, var, name):
    """Return a slot named `name` created for `var` by the Optimizer.

    Some `Optimizer` subclasses use additional variables.  For example
    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
    gives access to these `Variable` objects if for some reason you need them.

    Use `get_slot_names()` to get the list of slot names created by the
    `Optimizer`.

    Args:
      var: A variable passed to `minimize()` or `apply_gradients()`.
      name: A string.

    Returns:
      The `Variable` for the slot if it was created, `None` otherwise.
    """
    # pylint: disable=protected-access
    named_slots = self._slots.get(name, None)
    if not named_slots:
      return None

    if hasattr(var, "_distributed_container"):
      # NOTE: If this isn't patched, then there is no `handle` in
      # `_resource_apply_dense`.
      distributed_container = var._distributed_container()
      assert distributed_container is not None
      if ops.executing_eagerly_outside_functions():
        key = distributed_container._unique_id
      else:
        key = (distributed_container.graph, distributed_container._shared_name)
      # pylint: enable=protected-access
      mirrored_slot = named_slots.get(key, None)
      if mirrored_slot is None: return None
      return mirrored_slot.get(device=var.device)

    return named_slots.get(_var_key(var), None)
Exemplo n.º 14
0
def shutdown_tpu_system(cluster_resolver=None):
  """Shuts down the TPU devices.

  This will clear all caches, even those that are maintained through sequential
  calls to tf.tpu.experimental.initialize_tpu_system, such as the compilation
  cache.

  Args:
    cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
        which provides information about the TPU cluster.

  Raises:
    RuntimeError: If no TPU devices found for eager execution or if run in a
        tf.function.
  """
  job = None
  if cluster_resolver is None:
    # If no cluster resolver is specified, and running eagerly, execute the init
    # ops in the current device scope.
    if context.executing_eagerly():
      curr_device = device.DeviceSpec.from_string(context.context().device_name)
      if curr_device.job is not None:
        job = "{}/replica:0/task:0".format(curr_device.job)

    cluster_resolver = TPUClusterResolver("")
  assert isinstance(cluster_resolver, TPUClusterResolver)

  tpu_name = compat.as_text(cluster_resolver._tpu)  # pylint: disable=protected-access
  if tpu_name not in _INITIALIZED_TPU_SYSTEMS:
    logging.warning("You are shutting down a TPU system %s that has not been "
                    "initialized.")

  logging.info("Shutting down the TPU system: %s", tpu_name)

  if context.executing_eagerly():
    # This function looks as it is for the following non-intuitive reasons.
    # tpu.shutdown_system creates a dummy op whose sole purpose is to trigger
    # DistributedTPURewritePass. This pass actually adds real ops that
    # shutdown the TPU system. Thus, we can't simply run tpu.shutdown_system
    # eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
    if tpu_name not in _LOCAL_MASTERS:
      # Explicitly place the tpu.shutdown_system in the first worker to
      # avoid the output node match multiple devices error.
      job = "{}/replica:0/task:0".format(cluster_resolver.get_job_name())

    @function.defun
    def _tpu_shutdown_fn():
      tpu.shutdown_system(job=job)

    # The TPU_SYSTEM device must match the device used in tpu.shutdown_system
    # exactly, otherwise you can get errors if there are multiple TPU_SYSTEM
    # devices available.
    with ops.device(tpu._tpu_system_device_name(job)):  # pylint: disable=protected-access
      _tpu_shutdown_fn()

    # Clear out the eager context caches since the memory is invalid now.
    logging.info("Clearing out eager caches")
    context.context()._clear_caches()  # pylint: disable=protected-access
  elif not ops.executing_eagerly_outside_functions():
    master = cluster_resolver.master()
    cluster_spec = cluster_resolver.cluster_spec()

    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
    if cluster_spec:
      session_config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

    with ops.Graph().as_default():
      with session_lib.Session(config=session_config, target=master) as sess:
        sess.run(tpu.shutdown_system())
  else:
    raise RuntimeError("initialize_tpu_system is not supported within "
                       "tf.functions.")

  logging.info("Finished shutting down TPU system.")
  if tpu_name in _INITIALIZED_TPU_SYSTEMS:
    del _INITIALIZED_TPU_SYSTEMS[tpu_name]
Exemplo n.º 15
0
    def callback(self,
                 op_type,
                 inputs,
                 attrs,
                 outputs,
                 op_name=None,
                 graph=None):
        """Op callback for tracing (dumping) a TF program's execution."""
        del attrs  # Unused

        writer = self.get_writer()
        if graph:
            is_v1_graph_mode = not ops.executing_eagerly_outside_functions()
            context_id = self._get_context_id(graph)  # Innermost context ID.
            output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs))
            if op_type in ("Placeholder", "PlaceholderWithDefault"):
                # In some cases, the op name of a Placeholder op in a graph
                # can be duplicate (e.g., with the name "resource").
                # When this happens, we give the op an debugger-generated name
                # in order to prevent problems and check failures down the pipe.
                op_name = "%s_%d" % (op_name, self._symbolic_tensor_counter)
            if is_v1_graph_mode:
                for input_tensor in inputs:
                    # TODO(cais):
                    if input_tensor in self._placeholder_to_debug_tensor and outputs:
                        outputs[0].op._add_control_input(  # pylint: disable=protected-access
                            self._placeholder_to_debug_tensor[input_tensor].op)
            graph_op_creation = debug_event_pb2.GraphOpCreation(
                op_type=op_type,
                op_name=op_name,
                graph_name=graph.name if hasattr(graph, "name") else None,
                graph_id=context_id,
                input_names=[input_tensor.name for input_tensor in inputs],
                num_outputs=len(outputs),
                output_tensor_ids=output_tensor_ids,
                code_location=self._process_stack_frames())
            writer.WriteGraphOpCreation(graph_op_creation)
            if outputs and compat.as_bytes(
                    op_type) not in op_callbacks_common.OP_CALLBACK_SKIP_OPS:
                return self._instrument_symbolic_tensors(
                    outputs, op_type, op_name, context_id, output_tensor_ids)
        else:
            op_type_bytes = compat.as_bytes(op_type)
            if op_type_bytes == b"DebugNumericSummaryV2":
                # TODO(b/140334369): Remove this special casing logic once op_callback.
                # automatically prevents infinite recursion in eager mode.
                return None
            if op_type_bytes in op_callbacks_common.OP_CALLBACK_SKIP_OPS:
                return None
            context_id = self._func_graph_id_from_func_name(op_type)
            input_ids = [t._id for t in inputs]  # pylint:disable=protected-access
            output_tensor_device_ids = [
                writer.RegisterDeviceAndGetId(output.device)
                for output in outputs
            ] if outputs else []
            writer.WriteExecution(
                self._dump_eager_tensors(outputs,
                                         op_type,
                                         input_ids,
                                         output_tensor_device_ids,
                                         graph_id=context_id))
Exemplo n.º 16
0
    def apply_gradients(self, grads_and_vars, name=None):
        """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      name: Optional name for the returned operation.  Default to the name
        passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        grads_and_vars = _filter_grads(grads_and_vars)
        var_list = [v for (_, v) in grads_and_vars]
        if distribute_ctx.has_distribution_strategy():
            reduced_grads = merge_grads(grads_and_vars)
            grads_and_vars = zip(reduced_grads, var_list)

        with ops.init_scope():
            self._prepare()
            self._create_slots(var_list)
        update_ops = []

        def update_grad_to_var(grad, var):
            """Apply gradient to variable."""
            if isinstance(var, ops.Tensor):
                raise NotImplementedError("Trying to update a Tensor ", var)
            if isinstance(grad, ops.IndexedSlices):
                if var.constraint is not None:
                    raise RuntimeError(
                        "Cannot use a constraint function on a sparse variable."
                    )
                return self._resource_apply_sparse_duplicate_indices(
                    grad.values, var, grad.indices)
            update_op = self._resource_apply_dense(grad, var)
            if var.constraint is not None:
                with ops.control_dependencies([update_op]):
                    return var.assign(var.constraint(var))
            else:
                return update_op

        with ops.name_scope(name, self._name) as name:
            for grad, var in grads_and_vars:
                scope_name = ("" if ops.executing_eagerly_outside_functions()
                              else "_" + var.op.name)
                with ops.name_scope("update" + scope_name):
                    update_ops.append(update_grad_to_var(grad, var))
            # control dependencies does not work in per replica mode, please change
            # this once b/118841692 is fixed.
            # with ops.control_dependencies(update_ops):
            #   apply_updates = self._iterations.assign_add(1).op
            apply_updates = merge_update_step(update_ops, self.iterations)
            return apply_updates
Exemplo n.º 17
0
def _create_keras_history_helper(tensors, processed_ops, created_layers):
    """Helper method for `create_keras_history`.

  Args:
    tensors: A structure of Tensors for which to create Keras metadata.
    processed_ops: Set. TensorFlow operations that have already been wrapped in
      `TensorFlowOpLayer` instances.
    created_layers: List. The `TensorFlowOpLayer` instances created.

  Returns:
    Tuple. First element is the updated set of TensorFlow Operations that
    have been wrapped in `TensorFlowOpLayer` instances. Second element is
    a list of the `TensorFlowOpLayer` instances created.
  """
    # Import of `base_layer` needed in order to create `TensorFlowOpLayer`.
    # Cannot be imported at top because of circular dependencies.
    # TODO(omalleyt): Resolve circular dependency.
    from tensorflow.python.keras.engine import base_layer  # pylint: disable=g-import-not-at-top
    tensor_list = nest.flatten(tensors)
    sparse_ops = []
    ragged_tensors = []
    for tensor in tensor_list:
        if getattr(tensor, '_keras_history', None) is not None:
            continue
        if isinstance(
                tensor,
            (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
            sparse_ops.append(tensor.op)
            continue
        if tf_utils.is_ragged(tensor):
            # Ragged tensors don't have an op property
            ragged_tensors.append(tensor)
            continue
        op = tensor.op  # The Op that created this Tensor.
        if op not in processed_ops:
            # Recursively set `_keras_history`.
            op_inputs = list(op.inputs)
            constants = {}
            layer_inputs = []
            for i, op_input in enumerate(op_inputs):
                if uses_keras_history(op_input):
                    layer_inputs.append(op_input)
                else:
                    # Treat any value not originating from a `keras.Input` as
                    # a constant. Variables cannot be supported.
                    ds_with_session = (
                        distribution_strategy_context.in_cross_replica_context(
                        ) and not ops.executing_eagerly_outside_functions())
                    using_xla = control_flow_util.GraphOrParentsInXlaContext(
                        ops.get_default_graph())
                    if ds_with_session or using_xla or _UNSAFE_GRAPH_OP_LAYER_CREATION:
                        # In Legacy Graph mode, evaluating here makes Session be
                        # configured improperly. The downside of this is that saving
                        # via `get_config` breaks, but SavedModel still works.
                        constants[i] = op_input
                    else:
                        with ops.init_scope():
                            if ops.executing_eagerly_outside_functions():
                                constants[
                                    i] = backend.eval_in_eager_or_function(
                                        op_input)
                            else:
                                constants[i] = backend.function([],
                                                                op_input)([])
            layer_inputs = unnest_if_single_tensor(layer_inputs)
            processed_ops, created_layers = _create_keras_history_helper(
                layer_inputs, processed_ops, created_layers)
            name = op.name
            node_def = op.node_def.SerializeToString()
            op_layer = base_layer.TensorFlowOpLayer(node_def,
                                                    constants=constants,
                                                    name=name)
            created_layers.append(op_layer)
            op_layer._set_connectivity_metadata(  # pylint: disable=protected-access
                args=(layer_inputs, ),
                kwargs={},
                outputs=op.outputs)
            processed_ops.update([op])
    if sparse_ops or ragged_tensors:
        lambda_example = """
    weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights)
    output = tf.keras.layers.Lambda(weights_mult)(input)
    """
        raise ValueError(
            'Tensorflow ops that generate ragged or sparse tensor '
            'outputs are currently not supported by Keras automatic '
            'op wrapping. Please wrap these ops in a Lambda layer: '
            '\n\n```\n{example}\n```\n'
            'Sparse ops encountered: {sparse_ops}\n'
            'Ragged tensors encountered: {ragged_tensors}\n'.format(
                example=lambda_example,
                sparse_ops=str(sparse_ops),
                ragged_tensors=str(ragged_tensors)))
    return processed_ops, created_layers
Exemplo n.º 18
0
def _create_keras_history_helper(tensors, processed_ops, created_layers):
    """Helper method for `create_keras_history`.

  Arguments:
    tensors: A structure of Tensors for which to create Keras metadata.
    processed_ops: Set. TensorFlow operations that have already been wrapped in
      `TensorFlowOpLayer` instances.
    created_layers: List. The `TensorFlowOpLayer` instances created.

  Returns:
    Tuple. First element is the updated set of TensorFlow Operations that
    have been wrapped in `TensorFlowOpLayer` instances. Second element is
    a list of the `TensorFlowOpLayer` instances created.
  """
    # Import of `base_layer` needed in order to create `TensorFlowOpLayer`.
    # Cannot be imported at top because of circular dependencies.
    # TODO(omalleyt): Resolve circular dependency.
    from tensorflow.python.keras.engine import base_layer  # pylint: disable=g-import-not-at-top
    tensor_list = nest.flatten(tensors)
    for tensor in tensor_list:
        if getattr(tensor, '_keras_history', None) is not None:
            continue
        op = tensor.op  # The Op that created this Tensor.
        if op not in processed_ops:
            if op.type.startswith('Sparse'):
                lambda_example = """
        weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights)
        output = tf.keras.layers.Lambda(weights_mult)(input)
        """
                raise ValueError(
                    'Sparse ops are not supported with functional models with built-in '
                    'layer wrapping. Please wrap the sparse ops in a Lambda layer like'
                    ': \n{lambda_example}\n'.format(
                        lambda_example=lambda_example))

            # Recursively set `_keras_history`.
            op_inputs = list(op.inputs)
            constants = {}
            layer_inputs = []
            for i, op_input in enumerate(op_inputs):
                if uses_keras_history(op_input):
                    layer_inputs.append(op_input)
                else:
                    # Treat any value not originating from a `keras.Input` as
                    # a constant. Variables cannot be supported.
                    if (distribution_strategy_context.in_cross_replica_context(
                    ) and not ops.executing_eagerly_outside_functions()):
                        # In Legacy Graph mode, evaluating here makes Session be
                        # configured improperly.
                        constants[i] = op_input
                    else:
                        with ops.init_scope():
                            constants[i] = backend.function([], op_input)([])
            processed_ops, created_layers = _create_keras_history_helper(
                layer_inputs, processed_ops, created_layers)
            name = op.name
            node_def = op.node_def.SerializeToString()
            op_layer = base_layer.TensorFlowOpLayer(node_def,
                                                    constants=constants,
                                                    name=name)
            created_layers.append(op_layer)
            op_layer._add_inbound_node(  # pylint: disable=protected-access
                layer_inputs, op.outputs)
            processed_ops.update([op])
    return processed_ops, created_layers
Exemplo n.º 19
0
def is_in_tf_function():
  """Returns if inside of a tf.function."""
  return (ops.executing_eagerly_outside_functions() and
          not context.executing_eagerly() and not is_in_keras_graph())
Exemplo n.º 20
0
def check_graph_consistency(tensor=None, method='add_loss', force_raise=False):
    """Checks that tensors passed to `add_*` method match the Keras graph.

  When one of the `add_*` method is called inside a V2 conditional branch,
  the underlying tensor gets created in a FuncGraph managed by control_flow_v2.
  We need to raise clear error messages in such cases.

  Arguments:
    tensor: Tensor to check, or `False` if it is known that an error
      should be raised.
    method: Caller method, one of {'add_metric', 'add_loss', 'add_update'}.
    force_raise: If an error should be raised regardless of `tensor`.

  Raises:
    RuntimeError: In case of an out-of-graph tensor.
  """
    if (force_raise
            or (ops.executing_eagerly_outside_functions()
                and hasattr(tensor, 'graph')
                and isinstance(tensor.graph,
                               (control_flow_util_v2.CondBranchFuncGraph,
                                control_flow_util_v2.WhileCondFuncGraph,
                                control_flow_util_v2.WhileBodyFuncGraph)))):
        if method == 'activity_regularizer':
            bad_example = """
      class TestModel(tf.keras.Model):

        def __init__(self):
          super(TestModel, self).__init__(name='test_model')
          self.dense = tf.keras.layers.Dense(2, activity_regularizer='l2')

        def call(self, x, training=None):
          if training:
            return self.dense(x)
          else:
            return self.dense(x)
      """
            correct_example = """
      class TestModel(tf.keras.Model):

        def __init__(self):
          super(TestModel, self).__init__(name='test_model')
          self.dense = tf.keras.layers.Dense(2, activity_regularizer='l2')

        def call(self, x, training=None):
          return self.dense(x)
      """
            raise RuntimeError(
                'You are using a layer with `activity_regularizer` in a control flow '
                'branch, e.g.:\n{bad_example}\nThis is currently not supported. '
                'Please move your call to the layer with `activity_regularizer` out '
                'of the control flow branch, e.g.:\n{correct_example}\n'
                'You can also resolve this by marking your outer model/layer dynamic'
                ' (eager-only) by passing `dynamic=True` to the layer constructor. '
                'Any kind of control flow is supported with dynamic layers. '
                'Note that using `dynamic=True` requires you to implement static '
                'shape inference in the `compute_output_shape(input_shape)` '
                'method.'.format(bad_example=bad_example,
                                 correct_example=correct_example))

        if method == 'add_metric':
            bad_example = """
      def call(self, inputs, training=None):
        if training:
          metric = compute_metric(inputs)
          self.add_metric(metric, name='my_metric', aggregation='mean')
        return inputs
      """
            correct_example = """
      def call(self, inputs, training=None):
        if training:
          metric = compute_metric(inputs)
        else:
          metric = 0.
        self.add_metric(metric, name='my_metric', aggregation='mean')
        return inputs
      """
        elif method == 'add_loss':
            bad_example = """
      def call(self, inputs, training=None):
        if training:
          loss = compute_loss(inputs)
          self.add_loss(loss)
        return inputs
      """
            correct_example = """
      def call(self, inputs, training=None):
        if training:
          loss = compute_loss(inputs)
        else:
          loss = 0.
        self.add_loss(loss)
        return inputs
      """
        else:
            bad_example = """
      def call(self, inputs, training=None):
        if training:
          self.add_update(self.w.assign_add(1))
        return inputs
      """
            correct_example = """
      def call(self, inputs, training=None):
        if training:
          increment = 1
        else:
          increment = 0
        self.add_update(self.w.assign_add(increment))
        return inputs
      """
        raise RuntimeError(
            'You are using the method `{method}` in a control flow branch '
            'in your layer, e.g.:\n{bad_example}\n'
            'This is not currently supported. '
            'Please move your call to {method} out of the control flow branch, '
            'e.g.:\n{correct_example}\n'
            'You can also resolve this by marking your layer '
            'as dynamic (eager-only) by passing '
            '`dynamic=True` to the layer constructor. '
            'Any kind of control flow is supported with dynamic layers. '
            'Note that using `dynamic=True` requires you '
            'to implement static shape inference '
            'in the `compute_output_shape(input_shape)` method.'.format(
                method=method,
                bad_example=bad_example,
                correct_example=correct_example))
Exemplo n.º 21
0
    def _initialize_multi_worker(self, cluster_resolver):
        """Initializes the object for multi-worker training."""
        cluster_spec = multi_worker_util.normalize_cluster_spec(
            cluster_resolver.cluster_spec())
        task_type = cluster_resolver.task_type
        task_id = cluster_resolver.task_id
        if task_type is None or task_id is None:
            raise ValueError(
                "When `cluster_spec` is given, you must also specify "
                "`task_type` and `task_id`.")
        self._cluster_spec = cluster_spec
        self._task_type = task_type
        self._task_id = task_id
        self._id_in_cluster = multi_worker_util.id_in_cluster(
            self._cluster_spec, self._task_type, self._task_id)

        self._num_workers = multi_worker_util.worker_count(
            cluster_spec, task_type)
        if not self._num_workers:
            raise ValueError(
                "No `worker`, `chief` or `evaluator` tasks can be found "
                "in `cluster_spec`.")

        self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type,
                                                    task_id)

        self._worker_device = "/job:%s/task:%d" % (task_type, task_id)
        self._host_input_device = numpy_dataset.SingleDevice(
            self._worker_device)

        if (ops.executing_eagerly_outside_functions() and
                not getattr(self, "_local_or_standalone_client_mode", False)):
            context.context().configure_collective_ops(
                collective_leader=multi_worker_util.collective_leader(
                    cluster_spec, task_type, task_id),
                scoped_allocator_enabled_ops=("CollectiveReduce", ),
                device_filters=("/job:%s/task:%d" % (task_type, task_id), ))
            self._collective_ops_configured = True

        # Starting a std server in eager mode and in independent worker mode.
        if (context.executing_eagerly()
                and not getattr(self, "_std_server_started", False) and
                not getattr(self, "_local_or_standalone_client_mode", False)):
            # Checking _local_or_standalone_client_mode as well because we should not
            # create the std server in standalone client mode.
            config_proto = copy.deepcopy(context.context().config)
            config_proto = self._update_config_proto(config_proto)

            if hasattr(cluster_resolver, "port"):
                port = cluster_resolver.port
            else:
                port = 0
            server_def = tensorflow_server_pb2.ServerDef(
                cluster=cluster_spec.as_cluster_def(),
                default_session_config=config_proto,
                job_name=task_type,
                task_index=task_id,
                protocol=cluster_resolver.rpc_layer or "grpc",
                port=port)
            context.context().enable_collective_ops(server_def)
            self._std_server_started = True
            # The `ensure_initialized` is needed before calling
            # `context.context().devices()`.
            context.context().ensure_initialized()
            logging.info(
                "Enabled multi-worker collective ops with available devices: %r",
                context.context().devices())

        # TODO(yuefengz): The `num_gpus` is only for this particular task. It
        # assumes all workers have the same number of GPUs. We should remove this
        # assumption by querying all tasks for their numbers of GPUs.
        # TODO(b/126786766): TFConfigClusterResolver returns wrong number of GPUs in
        # some cases.
        if isinstance(cluster_resolver, TFConfigClusterResolver):
            num_gpus = context.num_gpus()
        else:
            num_gpus = cluster_resolver.num_accelerators().get("GPU", 0)

        if num_gpus:
            local_devices = tuple("%s/device:GPU:%d" % (self._worker_device, i)
                                  for i in range(num_gpus))
        else:
            local_devices = (self._worker_device, )

        self._collective_keys = cross_device_utils.CollectiveKeys()
        self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
            devices=local_devices,
            group_size=len(local_devices) * self._num_workers,
            collective_keys=self._collective_keys,
            communication=self._communication)
        # CrossDeviceOps for per host tensors.
        self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
            devices=[self._worker_device],
            group_size=self._num_workers,
            collective_keys=self._collective_keys,
            communication=cross_device_ops_lib.CollectiveCommunication.RING,
        )
        super(CollectiveAllReduceExtended,
              self)._initialize_single_worker(local_devices)

        # Add a default device so that ops without specified devices will not end up
        # on other workers.
        self._default_device = "/job:%s/task:%d" % (task_type, task_id)

        # Save the num_gpus_per_worker and rpc_layer for configure method.
        self._num_gpus_per_worker = num_gpus
        self._rpc_layer = cluster_resolver.rpc_layer
        self._warn_nccl_no_gpu()

        # TODO(b/151232436): Enable check health thread by default.
        if self._enable_check_health:
            self._start_check_health_thread()

        logging.info(
            "MultiWorkerMirroredStrategy with cluster_spec = %r, task_type = %r, "
            "task_id = %r, num_workers = %r, local_devices = %r, "
            "communication = %s", cluster_spec.as_dict(), task_type, task_id,
            self._num_workers, local_devices, self._communication)
Exemplo n.º 22
0
 def get_job_name(self):
     if ops.executing_eagerly_outside_functions() or self._should_resolve(
     ) or is_running_in_gce():
         return self.task_type
Exemplo n.º 23
0
    def _instrument_symbolic_tensors(self, tensors, op_type, op_name,
                                     tfdbg_context_id, tensor_ids):
        """Add debugging instrumentation for symbolic (i.e., non-eager) tensors.

    The detailed fashion in which the tensors are instrumented is determined
    by the tensor_debug_mode configured for the currently enabled dumping
    callback.

    Args:
      tensors: A tuple of Tensors to instrument. It is assumed that their
        ordering corresponds to the ordering of output tensors of an original
        op. Output slot indices (0-based) will be generated based on the
        ordering.
      op_type: Type name of the op that emits the Tensors (e.g., "MatMul").
      op_name: Name of the op that emits the Tensors (e.g., "dense_1/MatMul").
      tfdbg_context_id: A unique ID for the context that the op belongs to
        (e.g., a graph).
      tensor_ids: A list of unique ID numbers for the tensors, for tfdbg's
        internal use.

    Returns:
      Non-eager Tensors that override the `tensors` as the output of the op
      that originally generated `tensors`. In some cases (e.g., non-V1 graph
      mode), this may be `None`, as the instrumentation can simply rely on
      automatic control dependencies (see `auto_control_deps.py`) instead of
      tensor overriding.
    """
        tensor_debug_mode = self._tensor_debug_mode
        debug_urls = ["file://%s" % self._dump_root]
        is_v1_graph_mode = not ops.executing_eagerly_outside_functions()
        instrumented_tensors = [] if is_v1_graph_mode else None
        if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
            for output_slot, tensor in enumerate(tensors):
                if (not self._should_dump_tensor(op_type, tensor.dtype)
                        or not tensor.dtype.is_numpy_compatible):
                    if is_v1_graph_mode:
                        instrumented_tensors.append(tensor)
                    continue
                if is_v1_graph_mode and not tensor.dtype.is_numpy_compatible:
                    # Avoid instrumenting Placeholder under is_v1_graph_mode. Doing that
                    # would cause runtime complaint about Placeholders not being fed.
                    instrumented_tensors.append(tensor)
                    continue
                # Except in V1 graph mode + control flow, debug_identity_v2 triggers
                # auto control dependency because it's a stateful op.
                debug_tensor = gen_debug_ops.debug_identity_v2(
                    # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
                    # as a low-overhead placeholder, since no actual tensor value is
                    # traced.
                    constant_op.constant([], dtype=dtypes.float32),
                    tfdbg_context_id=tfdbg_context_id,
                    op_name=op_name,
                    output_slot=output_slot,
                    tensor_debug_mode=self._tensor_debug_mode,
                    debug_urls=debug_urls)
                if is_v1_graph_mode:
                    instrumented_tensors.append(
                        self._process_v1_graph_mode_tensor(
                            op_type, tensor, debug_tensor, tensor_debug_mode))
            return instrumented_tensors
        elif tensor_debug_mode in (
                debug_event_pb2.TensorDebugMode.CURT_HEALTH,
                debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
                debug_event_pb2.TensorDebugMode.FULL_HEALTH,
                debug_event_pb2.TensorDebugMode.SHAPE):
            for output_slot, tensor in enumerate(tensors):
                dtype = tensor.dtype
                dtype_is_dumpable = (
                    tensor_debug_mode
                    in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,
                        debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
                        debug_event_pb2.TensorDebugMode.FULL_HEALTH)
                    and dtype.is_floating or tensor_debug_mode
                    == debug_event_pb2.TensorDebugMode.SHAPE and
                    (dtype.is_floating or dtype.is_integer or dtype.is_bool))
                if (not self._should_dump_tensor(op_type, tensor.dtype)
                        or not dtype_is_dumpable):
                    if is_v1_graph_mode:
                        instrumented_tensors.append(tensor)
                    continue
                debug_tensor = gen_debug_ops.debug_identity_v2(
                    gen_debug_ops.debug_numeric_summary_v2(
                        tensor,
                        tensor_id=tensor_ids[output_slot],
                        tensor_debug_mode=self._tensor_debug_mode,
                        output_dtype=dtypes.float64),
                    tfdbg_context_id=tfdbg_context_id,
                    op_name=op_name,
                    output_slot=output_slot,
                    tensor_debug_mode=self._tensor_debug_mode,
                    debug_urls=debug_urls)
                if is_v1_graph_mode:
                    instrumented_tensors.append(
                        self._process_v1_graph_mode_tensor(
                            op_type, tensor, debug_tensor, tensor_debug_mode))
            return instrumented_tensors
        elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
            for output_slot, tensor in enumerate(tensors):
                if (not self._should_dump_tensor(op_type, tensor.dtype)
                        or not tensor.dtype.is_numpy_compatible):
                    # Instrumenting DT_VARIANT and DT_RESOURCE type tensors under
                    # V1 graph mode is known to have issues. TODO(cais): Investigate.
                    if is_v1_graph_mode:
                        instrumented_tensors.append(tensor)
                    continue
                debug_tensor = gen_debug_ops.debug_identity_v2(
                    tensor,
                    tfdbg_context_id=tfdbg_context_id,
                    op_name=op_name,
                    output_slot=output_slot,
                    tensor_debug_mode=self._tensor_debug_mode,
                    debug_urls=debug_urls)
                if is_v1_graph_mode:
                    instrumented_tensors.append(
                        self._process_v1_graph_mode_tensor(
                            op_type, tensor, debug_tensor, tensor_debug_mode))
            return instrumented_tensors
        else:
            raise NotImplementedError(
                "Symbolic tensor instrumentation is not implemented for debug mode "
                "%s" % self._tensor_debug_mode)
Exemplo n.º 24
0
def clone_and_build_model(
    model, input_tensors=None, target_tensors=None, custom_objects=None,
    compile_clone=True, in_place_reset=False, optimizer_iterations=None,
    optimizer_config=None):
  """Clone a `Model` and build/compile it with the same settings used before.

  This function can be run in the same graph or in a separate graph from the
  model. When using a separate graph, `in_place_reset` must be `False`.

  Note that, currently, the clone produced from this function may not work with
  TPU DistributionStrategy. Try at your own risk.

  Args:
    model: `tf.keras.Model` object. Can be Functional, Sequential, or
      sub-classed.
    input_tensors: Optional list or dictionary of input tensors to build the
      model upon. If not provided, placeholders will be created.
    target_tensors: Optional list of target tensors for compiling the model. If
      not provided, placeholders will be created.
    custom_objects: Optional dictionary mapping string names to custom classes
      or functions.
    compile_clone: Boolean, whether to compile model clone (default `True`).
    in_place_reset: Boolean, whether to reset the model in place. Only used if
      the model is a subclassed model. In the case of a subclassed model,
      this argument must be set to `True` (default `False`). To restore the
      original model, use the function
      `in_place_subclassed_model_state_restoration(model)`.
    optimizer_iterations: An iterations variable that will be incremented by the
      optimizer if the clone is compiled. This argument is used when a Keras
      model is cloned into an Estimator model function, because Estimators
      create their own global step variable.
    optimizer_config: Optimizer config dictionary or list of dictionary
      returned from `get_config()`. This argument should be defined if
      `clone_and_build_model` is called in a different graph or session from
      the original model, and the optimizer is an instance of `OptimizerV2`.

  Returns:
    Clone of the model.

  Raises:
    ValueError: Cloning fails in the following cases
      - cloning a subclassed model with `in_place_reset` set to False.
      - compiling the clone when the original model has not been compiled.
  """
  # Grab optimizer now, as we reset-in-place for subclassed models, but
  # want to maintain access to the original optimizer.
  orig_optimizer = model.optimizer
  if compile_clone and not orig_optimizer:
    raise ValueError(
        'Error when cloning model: compile_clone was set to True, but the '
        'original model has not been compiled.')

  if compile_clone:
    compile_args = model._get_compile_args()  # pylint: disable=protected-access
    # Allows this method to be robust to switching graph and eager classes.
    model._get_compile_args = lambda: compile_args

  with CustomObjectScope(custom_objects or {}):
    if model._is_graph_network:
      clone = clone_model(model, input_tensors=input_tensors)
    elif isinstance(model, Sequential):
      clone = clone_model(model, input_tensors=input_tensors)
      if (not clone._is_graph_network and model._build_input_shape is not None):
        if ops.executing_eagerly_outside_functions():
          clone.build(model._build_input_shape)
        else:
          clone._set_inputs(
              K.placeholder(
                  model._build_input_shape, dtype=model.inputs[0].dtype))
    else:
      try:
        # Prefer cloning the model if serial/deserial logic is implemented for
        # subclassed model.
        clone = model.__class__.from_config(model.get_config())
      except NotImplementedError:
        logging.warning('This model is a subclassed model. Please implement '
                        '`get_config` and `from_config` to better support '
                        'cloning the model.')
        if not in_place_reset:
          raise ValueError(
              'This model is a subclassed model. '
              'Such a model cannot be cloned, but there is a workaround where '
              'the model is reset in-place. To use this, please set the '
              'argument `in_place_reset` to `True`. This will reset the '
              'attributes in the original model. To restore the attributes, '
              'call `in_place_subclassed_model_state_restoration(model)`.')
        clone = model
        _in_place_subclassed_model_reset(clone)
      if input_tensors is not None:
        if isinstance(input_tensors, (list, tuple)) and len(input_tensors) == 1:
          input_tensors = input_tensors[0]
        clone._set_inputs(input_tensors)

  if compile_clone:
    if isinstance(orig_optimizer, optimizer_v1.TFOptimizer):
      optimizer = optimizer_v1.TFOptimizer(
          orig_optimizer.optimizer, optimizer_iterations)
      K.track_tf_optimizer(optimizer)
    else:
      if not isinstance(orig_optimizer, (tuple, list)):
        orig_optimizer = [orig_optimizer]
      if optimizer_config is None:
        optimizer = [
            opt.__class__.from_config(opt.get_config())
            for opt in orig_optimizer
        ]
      elif isinstance(optimizer_config, dict):
        optimizer = [orig_optimizer[0].__class__.from_config(optimizer_config)]
      else:
        # optimizer config is list of dict, same order as orig_optimizer.
        optimizer = [
            opt.__class__.from_config(opt_config)
            for (opt, opt_config) in zip(orig_optimizer, optimizer_config)
        ]
      if optimizer_iterations is not None:
        for opt in optimizer:
          opt.iterations = optimizer_iterations

      if len(optimizer) == 1:
        optimizer = optimizer[0]

    compile_args['optimizer'] = optimizer
    if target_tensors is not None:
      compile_args['target_tensors'] = target_tensors
    # Ensure Metric objects in new model are separate from existing model.
    compile_args['metrics'] = metrics_module.clone_metrics(
        compile_args['metrics'])
    compile_args['weighted_metrics'] = metrics_module.clone_metrics(
        compile_args['weighted_metrics'])
    clone.compile(**compile_args)

  return clone
Exemplo n.º 25
0
def _in_place_subclassed_model_reset(model):
  """Substitute for model cloning that works for subclassed models.

  Subclassed models cannot be cloned because their topology is not serializable.
  To "instantiate" an identical model in a new TF graph, we reuse the original
  model object, but we clear its state.

  After calling this function on a model instance, you can use the model
  instance as if it were a model clone (in particular you can use it in a new
  graph).

  This method clears the state of the input model. It is thus destructive.
  However the original state can be restored fully by calling
  `_in_place_subclassed_model_state_restoration`.

  Args:
    model: Instance of a Keras model created via subclassing.

  Raises:
    ValueError: In case the model uses a subclassed model as inner layer.
  """
  assert not model._is_graph_network  # Only makes sense for subclassed networks
  # Select correct base class for new Model.
  version_utils.swap_class(model.__class__, training.Model, training_v1.Model,
                           ops.executing_eagerly_outside_functions())
  # Retrieve all layers tracked by the model as well as their attribute names
  attributes_cache = {}
  for name in dir(model):
    # Skip attrs that track other trackables.
    if name == 'submodules' or name == '_self_tracked_trackables':
      continue

    try:
      value = getattr(model, name)
    except (AttributeError, ValueError, TypeError):
      continue
    if isinstance(value, Layer):
      attributes_cache[name] = value
      assert value in model.layers
      if hasattr(value, 'layers') and value.layers:
        raise ValueError('We do not support the use of nested layers '
                         'in `model_to_estimator` at this time. Found nested '
                         'layer: %s' % value)
    elif isinstance(
        value, (list, tuple)) and name not in ('layers', '_layers', 'metrics',
                                               '_compile_metric_functions',
                                               '_output_loss_metrics'):
      # Handle case: list/tuple of layers (also tracked by the Network API).
      if value and all(isinstance(val, Layer) for val in value):
        raise ValueError('We do not support the use of list-of-layers '
                         'attributes in subclassed models used with '
                         '`model_to_estimator` at this time. Found list '
                         'model: %s' % name)

  # Replace layers on the model with fresh layers
  layers_to_names = {value: key for key, value in attributes_cache.items()}
  original_layers = list(
      model._flatten_layers(include_self=False, recursive=False))
  setattr_tracking = model._setattr_tracking
  model._setattr_tracking = False
  model._self_tracked_trackables = []
  for layer in original_layers:  # We preserve layer order.
    config = layer.get_config()
    # This will not work for nested subclassed models used as layers.
    # This would be theoretically possible to support, but would add complexity.
    # Only do it if users complain.
    if isinstance(layer, training.Model) and not layer._is_graph_network:
      raise ValueError('We do not support the use of nested subclassed models '
                       'in `model_to_estimator` at this time. Found nested '
                       'model: %s' % layer)
    fresh_layer = layer.__class__.from_config(config)
    name = layers_to_names[layer]
    setattr(model, name, fresh_layer)
    model._self_tracked_trackables.append(fresh_layer)

  # Cache original model build attributes (in addition to layers)
  if (not hasattr(model, '_original_attributes_cache') or
      model._original_attributes_cache is None):
    if model.built:
      attributes_to_cache = [
          'inputs',
          'outputs',
          'total_loss',
          'optimizer',
          'train_function',
          'test_function',
          'predict_function',
          '_training_endpoints',
          '_collected_trainable_weights',
          '_feed_inputs',
          '_feed_input_names',
          '_feed_input_shapes',
      ]
      for name in attributes_to_cache:
        attributes_cache[name] = getattr(model, name)
  model._original_attributes_cache = attributes_cache
  _reset_build_compile_trackers(model)
  model._setattr_tracking = setattr_tracking
Exemplo n.º 26
0
    def cluster_spec(self):
        """Returns a ClusterSpec object based on the latest TPU information.

    We retrieve the information from the GCE APIs every time this method is
    called.

    Returns:
      A ClusterSpec containing host information returned from Cloud TPUs,
      or None.

    Raises:
      RuntimeError: If the provided TPU is not healthy.
    """
        ############################################################################
        # There are 5 potential cases this code must handle:
        #  1. [Normal case.] We should resolve the TPU name to a set of tasks, and
        #      a. Create a ClusterSpec that includes the coordinator job
        #      b. Create a ClusterSpec without the coordinator job.
        #  2. [GKE / No API Access.] We should not resolve the TPU name to a set of
        #     tasks and
        #      a. Create a ClusterSpec with the coordinator
        #      b. Create a ClusterSpec without the coordinator
        #  3. [Other (legacy non-gRPC).] We should return None.
        ############################################################################

        if self._should_resolve():
            # Case 1.
            response = self._fetch_cloud_tpu_metadata()  # pylint: disable=protected-access

            if 'state' in response and response['state'] != 'READY':
                raise RuntimeError(
                    'TPU "%s" is not yet ready; state: "%s"' %
                    (compat.as_text(self._tpu), response['state']))

            if 'networkEndpoints' in response:
                worker_list = [
                    '%s:%s' % (endpoint['ipAddress'], endpoint['port'])
                    for endpoint in response['networkEndpoints']
                ]
            else:
                # Fall back to the deprecated response format
                instance_url = '%s:%s' % (response['ipAddress'],
                                          response['port'])
                worker_list = [instance_url]

            cluster_spec = {self.task_type: worker_list}
        else:
            is_eager = ops.executing_eagerly_outside_functions()
            if self.rpc_layer is None and not is_eager:
                # Case 3.
                return None
            # Case 2.
            tpus = []
            for tpu in compat.as_text(self._tpu).split(_ENDPOINTS_SEPARATOR):
                # We are working around the fact that GKE environment variable that is
                # supplied to us has the protocol string embedded in it, but we want
                # to strip it out for the ClusterSpec.
                if (self.rpc_layer is not None
                        and tpu.startswith(self.rpc_layer + '://')):
                    tpus.append(tpu[len(self.rpc_layer + '://'):])
                else:
                    tpus.append(tpu)
            cluster_spec = {self.task_type: tpus}

        if self._coordinator_address:
            # {1, 2}.a
            cluster_spec[self._coordinator_name] = [self._coordinator_address]

        return server_lib.ClusterSpec(cluster_spec)
Exemplo n.º 27
0
 def f():
     # Minimize both the AutoCastVariable and the normal tf.Variable. Both
     # variables should be updated to the same value.
     op = opt.minimize(lambda: x + y, var_list=[x, y])
     return None if ops.executing_eagerly_outside_functions() else op
Exemplo n.º 28
0
 def _check_model_class(self, model_class):
     if ops.executing_eagerly_outside_functions():
         self.assertEqual(model_class, training.Model)
     else:
         self.assertEqual(model_class, training_v1.Model)
Exemplo n.º 29
0
    def _initialize_local(self, cluster_resolver, devices=None):
        """Initializes the object for local training."""
        self._is_chief = True
        self._num_workers = 1

        if ops.executing_eagerly_outside_functions():
            try:
                context.context().configure_collective_ops(
                    scoped_allocator_enabled_ops=("CollectiveReduce", ))
            except RuntimeError:
                logging.warning(
                    "Collective ops is not configured at program startup. "
                    "Some performance features may not be enabled.")
            self._collective_ops_configured = True

        # TODO(b/126786766): TFConfigClusterResolver returns wrong number of GPUs in
        # some cases.
        if isinstance(cluster_resolver, TFConfigClusterResolver):
            num_gpus = context.num_gpus()
        else:
            num_gpus = cluster_resolver.num_accelerators().get("GPU", 0)

        if devices:
            local_devices = devices
        else:
            if num_gpus:
                local_devices = tuple("/device:GPU:%d" % i
                                      for i in range(num_gpus))
            else:
                local_devices = ("/device:CPU:0", )

        self._worker_device = device_util.canonicalize("/device:CPU:0")
        self._host_input_device = numpy_dataset.SingleDevice(
            self._worker_device)

        self._collective_keys = cross_device_utils.CollectiveKeys()
        self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
            devices=local_devices,
            group_size=len(local_devices),
            collective_keys=self._collective_keys,
            communication=self._communication)
        # CrossDeviceOps for per host tensors.
        self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
            devices=[self._worker_device],
            group_size=self._num_workers,
            collective_keys=self._collective_keys,
            communication=cross_device_ops_lib.CollectiveCommunication.RING,
        )
        super(CollectiveAllReduceExtended,
              self)._initialize_single_worker(local_devices)

        self._cluster_spec = None
        self._task_type = None
        self._task_id = None
        self._id_in_cluster = 0

        # This is a mark to tell whether we are running with standalone client or
        # independent worker. Right now with standalone client, strategy object is
        # created as local strategy and then turn into multi-worker strategy via
        # configure call.
        self._local_or_standalone_client_mode = True

        # Save the num_gpus_per_worker and rpc_layer for configure method.
        self._num_gpus_per_worker = num_gpus
        self._rpc_layer = cluster_resolver.rpc_layer
        self._warn_nccl_no_gpu()

        logging.info(
            "Single-worker MultiWorkerMirroredStrategy with local_devices "
            "= %r, communication = %s", local_devices, self._communication)
Exemplo n.º 30
0
 def __new__(cls, *args, **kwargs):  # pylint: disable=unused-argument
     eager_enabled = ops.executing_eagerly_outside_functions()
     cls = swap_class(cls, training.Model, training_v1.Model, eager_enabled)
     return super(ModelVersionSelector, cls).__new__(cls)
Exemplo n.º 31
0
 def callback(self,
              op_type,
              inputs,
              attrs,
              outputs,
              op_name=None,
              graph=None):
     """Eager-function unified callback for checking numerics."""
     del attrs, op_name  # Unused
     op_type_bytes = compat.as_bytes(op_type)
     is_v1_graph_mode = not ops.executing_eagerly_outside_functions()
     if (op_type_bytes in op_callbacks_common.OP_CALLBACK_SKIP_OPS
             or op_type_bytes in SAFE_OPS):
         return None
     if graph:
         # Under graph mode. Insert check_numerics op.
         instrumented_outputs = []
         if is_v1_graph_mode:
             for input_tensor in inputs:
                 if input_tensor in self._placeholder_to_debug_tensor and outputs:
                     outputs[0].op._add_control_input(  # pylint: disable=protected-access
                         self._placeholder_to_debug_tensor[input_tensor].op)
         for slot, output in enumerate(outputs):
             if (output.dtype.is_floating
                     and (op_type_bytes, slot) not in IGNORE_OP_OUTPUTS):
                 checked_output = array_ops.check_numerics_v2(
                     # TF v2 has automatic control dependencies added to stateful async
                     # ops, which allows us to run check_numerics asynchronously.
                     # In the above case we use debug_summary to reduce all output
                     # tensors asynchronously from the op being checked and then
                     # process the tensor summary with check_numerics.
                     output if is_v1_graph_mode else _debug_summary(output),
                     get_check_numerics_error_message(
                         slot,
                         len(outputs),
                         op_type,
                         output,
                         inputs,
                         graph=graph,
                         traceback=output.op.traceback))
                 _CHECK_NUMERICS_INPUT_LOOKUP[graph][
                     checked_output.name] = output
                 instrumented_outputs.append(
                     self._get_output_tensor(op_type_bytes, output,
                                             checked_output,
                                             is_v1_graph_mode))
             else:
                 instrumented_outputs.append(output)
         return instrumented_outputs
     else:
         if op_type_bytes == b"CheckNumericsV2":
             # TODO(b/140334369): Remove this special casing logic once op_callback.
             # automatically prevents infinite recursion in eager mode.
             return None
         # Under eager mode. Eagerly execute check_numerics op.
         for slot, output in enumerate(outputs):
             if (output.dtype.is_floating
                     and (op_type_bytes, slot) not in IGNORE_OP_OUTPUTS):
                 array_ops.check_numerics_v2(
                     output,
                     get_check_numerics_error_message(
                         slot,
                         len(outputs),
                         op_type,
                         output,
                         inputs,
                         stack_height_limit=self._stack_height_limit,
                         path_length_limit=self._path_length_limit))
Exemplo n.º 32
0
 def __new__(cls, *args, **kwargs):  # pylint: disable=unused-argument
     eager_enabled = ops.executing_eagerly_outside_functions()
     cls = swap_class(cls, base_layer.Layer, base_layer_v1.Layer,
                      eager_enabled)
     return super(LayerVersionSelector, cls).__new__(cls)
Exemplo n.º 33
0
 def _use_collective_v2(self):
     if not ops.executing_eagerly_outside_functions():
         return False
     return CollectiveReplicaLauncher._prefer_collective_v2
Exemplo n.º 34
0
    def _distributed_apply(distribution, grads_and_vars, name, apply_state):
        """`apply_gradients` using a `DistributionStrategy`."""
        reduced_grads = distribution.extended.batch_reduce_to(
            ds_reduce_util.ReduceOp.SUM, grads_and_vars)
        var_list = [v for _, v in grads_and_vars]
        grads_and_vars = zip(reduced_grads, var_list)

        def apply_grad_to_update_var(var, grad):
            """Apply gradient to variable."""
            if isinstance(var, ops.Tensor):
                raise NotImplementedError("Trying to update a Tensor ", var)

            apply_kwargs = {}
            if not isinstance(var, de.TrainableWrapper):
                if isinstance(grad, ops.IndexedSlices):
                    if var.constraint is not None:
                        raise RuntimeError(
                            "Cannot use a constraint function on a sparse variable."
                        )
                    if "apply_state" in self._sparse_apply_args:
                        apply_kwargs["apply_state"] = apply_state
                    return self._resource_apply_sparse_duplicate_indices(
                        grad.values, var, grad.indices, **apply_kwargs)

                if "apply_state" in self._dense_apply_args:
                    apply_kwargs["apply_state"] = apply_state
                update_op = self._resource_apply_dense(grad, var,
                                                       **apply_kwargs)
                if var.constraint is not None:
                    with ops.control_dependencies([update_op]):
                        return var.assign(var.constraint(var))
                else:
                    return update_op
            else:
                with ops.colocate_with(None, ignore_existing=True):
                    _slots = [
                        self.get_slot(var, _s) for _s in self.get_slot_names()
                    ]
                    with ops.control_dependencies([grad]):
                        _before = [var.read_value()
                                   ] + [_s.read_value() for _s in _slots]
                    if isinstance(grad, ops.IndexedSlices):
                        if var.constraint is not None:
                            raise RuntimeError(
                                "Cannot use a constraint function on a sparse variable."
                            )
                        if "apply_state" in self._sparse_apply_args:
                            apply_kwargs["apply_state"] = apply_state
                        with ops.control_dependencies(_before):
                            _apply_op = self._resource_apply_sparse_duplicate_indices(
                                grad.values, var, grad.indices, **apply_kwargs)
                        with ops.control_dependencies([_apply_op]):
                            _after = control_flow_ops.group(
                                [var.update_op()] +
                                [_s.update_op() for _s in _slots])
                            return _after

                    if "apply_state" in self._dense_apply_args:
                        apply_kwargs["apply_state"] = apply_state
                    with ops.control_dependencies(_before):
                        update_op = self._resource_apply_dense(
                            grad, var, **apply_kwargs)
                    if var.constraint is not None:
                        with ops.control_dependencies([update_op]):
                            return var.assign(var.constraint(var))
                    else:
                        with ops.control_dependencies([update_op]):
                            _after = control_flow_ops.group(
                                [var.update_op()] +
                                [_s.update_op() for _s in _slots])
                        return _after

        update_ops = []
        with backend.name_scope(name or self._name):
            for grad, var in grads_and_vars:
                scope_name = ("update"
                              if ops.executing_eagerly_outside_functions() else
                              "update_" + var.op.name)
                # Colocate the update with variables to avoid unnecessary communication
                # delays. See b/136304694.
                with backend.name_scope(
                        scope_name), distribution.extended.colocate_vars_with(
                            var):
                    update_ops.extend(
                        distribution.extended.update(var,
                                                     apply_grad_to_update_var,
                                                     args=(grad, ),
                                                     group=False))

            any_symbolic = any(
                isinstance(i, ops.Operation) or tf_utils.is_symbolic_tensor(i)
                for i in update_ops)
            if not context.executing_eagerly() or any_symbolic:
                # If the current context is graph mode or any of the update ops are
                # symbolic then the step update should be carried out under a graph
                # context. (eager updates execute immediately)
                with ops._get_graph_from_inputs(update_ops).as_default():  # pylint: disable=protected-access
                    with ops.control_dependencies(update_ops):
                        return self._iterations.assign_add(1).op

            return self._iterations.assign_add(1)
Exemplo n.º 35
0
def check_graph_consistency(tensor=None, method='add_loss', force_raise=False):
  """Checks that tensors passed to `add_*` method match the Keras graph.

  When one of the `add_*` method is called inside a V2 conditional branch,
  the underlying tensor gets created in a FuncGraph managed by control_flow_v2.
  We need to raise clear error messages in such cases.

  Arguments:
    tensor: Tensor to check, or `False` if it is known that an error
      should be raised.
    method: Caller method, one of {'add_metric', 'add_loss', 'add_update'}.
    force_raise: If an error should be raised regardless of `tensor`.

  Raises:
    RuntimeError: In case of an out-of-graph tensor.
  """
  if (force_raise or (ops.executing_eagerly_outside_functions() and
                      hasattr(tensor, 'graph') and
                      isinstance(tensor.graph,
                                 (control_flow_util_v2.CondBranchFuncGraph,
                                  control_flow_util_v2.WhileCondFuncGraph,
                                  control_flow_util_v2.WhileBodyFuncGraph)))):
    if method == 'add_metric':
      bad_example = """
      def call(self, inputs, training=None):
        if training:
          metric = compute_metric(inputs)
          self.add_metric(metric, name='my_metric', aggregation='mean')
        return inputs
      """
      correct_example = """
      def call(self, inputs, training=None):
        if training:
          metric = compute_metric(inputs)
        else:
          metric = 0.
        self.add_metric(metric, name='my_metric', aggregation='mean')
        return inputs
      """
    elif method == 'add_loss':
      bad_example = """
      def call(self, inputs, training=None):
        if training:
          loss = compute_loss(inputs)
          self.add_loss(loss)
        return inputs
      """
      correct_example = """
      def call(self, inputs, training=None):
        if training:
          loss = compute_loss(inputs)
        else:
          loss = 0.
        self.add_loss(loss)
        return inputs
      """
    else:
      bad_example = """
      def call(self, inputs, training=None):
        if training:
          self.add_update(self.w.assign_add(1))
        return inputs
      """
      correct_example = """
      def call(self, inputs, training=None):
        if training:
          increment = 1
        else:
          increment = 0
        self.add_update(self.w.assign_add(increment))
        return inputs
      """
    raise RuntimeError(
        'You are using the method `{method}` in a control flow branch '
        'in your layer, e.g.:\n{bad_example}\n'
        'This is not currently supported. '
        'You should either use static control flow (`tf.cond`) '
        'or move your call to {method} out of the control flow branch, '
        'e.g.:\n{correct_example}\n'
        'You can also resolve this by marking your layer '
        'as dynamic (eager-only) by passing '
        '`dynamic=True` to the layer constructor. '
        'Any kind of control flow is supported with dynamic layers. '
        'Note that using `dynamic=True` requires you '
        'to implement static shape inference '
        'in the `compute_output_shape(input_shape)` method.'.format(
            method=method,
            bad_example=bad_example,
            correct_example=correct_example))
Exemplo n.º 36
0
    def add_weight(self,
                   name,
                   shape,
                   dtype=None,
                   initializer=None,
                   regularizer=None,
                   trainable=None,
                   constraint=None,
                   use_resource=None,
                   synchronization=vs.VariableSynchronization.AUTO,
                   aggregation=vs.VariableAggregation.NONE,
                   partitioner=None,
                   **kwargs):
        """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
        Note, if the current variable scope is marked as non-trainable
        then this parameter is ignored and any added variables are also
        marked as non-trainable. `trainable` defaults to `True` unless
        `synchronization` is set to `ON_READ`.
      constraint: constraint instance (callable).
      use_resource: Whether to use `ResourceVariable`.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
        `AUTO` and the current `DistributionStrategy` chooses
        when to synchronize. If `synchronization` is set to `ON_READ`,
        `trainable` must not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
      partitioner: (optional) partitioner instance (callable).  If
        provided, when the requested variable is created it will be split
        into multiple partitions according to `partitioner`.  In this case,
        an instance of `PartitionedVariable` is returned.  Available
        partitioners include `tf.compat.v1.fixed_size_partitioner` and
        `tf.compat.v1.variable_axis_size_partitioner`.  For more details, see
        the documentation of `tf.compat.v1.get_variable` and the  "Variable
        Partitioners and Sharding" section of the API guide.
      **kwargs: Additional keyword arguments.

    Returns:
      The created variable.  Usually either a `Variable` or `ResourceVariable`
      instance.  If `partitioner` is not `None`, a `PartitionedVariable`
      instance is returned.

    Raises:
      RuntimeError: If called with partioned variable regularization and
        eager execution is enabled.
      ValueError: When trainable has been set to True with synchronization
        set as `ON_READ`.
    """
        for kwarg in kwargs:
            if kwarg != 'experimental_autocast':
                raise TypeError('Unknown keyword argument:', kwarg)
        if self._keras_style:
            return super(Layer, self).add_weight(
                name=name,
                shape=shape,
                dtype=dtype,
                initializer=initializer,
                regularizer=regularizer,
                trainable=trainable and self.trainable,
                constraint=constraint,
                use_resource=use_resource,
                synchronization=vs.VariableSynchronization.AUTO,
                aggregation=vs.VariableAggregation.NONE,
                partitioner=partitioner,
                **kwargs)

        if synchronization == vs.VariableSynchronization.ON_READ:
            if trainable:
                raise ValueError(
                    'Synchronization value can be set to '
                    'VariableSynchronization.ON_READ only for non-trainable variables. '
                    'You have specified trainable=True and '
                    'synchronization=VariableSynchronization.ON_READ.')
            else:
                # Set trainable to be false when variable is to be synced on read.
                trainable = False
        elif trainable is None:
            trainable = True

        def _should_add_regularizer(variable, existing_variable_set):
            if isinstance(variable, tf_variables.PartitionedVariable):
                for var in variable:
                    if var in existing_variable_set:
                        return False
                return True
            else:
                return variable not in existing_variable_set

        init_graph = None
        if not context.executing_eagerly():
            default_graph = ops.get_default_graph()
            if default_graph.building_function:
                with ops.init_scope():
                    # Retrieve the variables from the graph into which variables
                    # will be lifted; if initialization ops will be lifted into
                    # the eager context, then there is nothing to retrieve, since variable
                    # collections are not supported when eager execution is enabled.
                    if not context.executing_eagerly():
                        init_graph = ops.get_default_graph()
                        existing_variables = set(
                            tf_variables.global_variables())
            else:
                # Initialization ops will not be lifted out of the default graph.
                init_graph = default_graph
                existing_variables = set(tf_variables.global_variables())

        if dtype is None:
            dtype = self.dtype or dtypes.float32

        self._set_scope(None)
        reuse = self.built or self._reuse
        prev_len_trainable = len(self._trainable_weights)
        with vs.variable_scope(self._scope,
                               reuse=reuse,
                               auxiliary_name_scope=False) as scope:
            self._current_scope = scope
            with ops.name_scope(self._name_scope()):
                use_resource = (use_resource or self._use_resource_variables
                                or scope.use_resource)
                if initializer is None:
                    initializer = scope.initializer
                variable = super(Layer, self).add_weight(
                    name,
                    shape,
                    dtype=dtypes.as_dtype(dtype),
                    initializer=initializer,
                    trainable=trainable and self.trainable,
                    constraint=constraint,
                    partitioner=partitioner,
                    use_resource=use_resource,
                    synchronization=synchronization,
                    aggregation=aggregation,
                    getter=vs.get_variable,
                    **kwargs)

                if regularizer:
                    if (ops.executing_eagerly_outside_functions()
                            or _should_add_regularizer(variable,
                                                       existing_variables)):
                        self._handle_weight_regularization(
                            name, variable, regularizer)

                if init_graph is not None:
                    # Handle edge case where a custom getter has overridden `trainable`.
                    # There is one known occurrence of this, in unit test
                    # testBasicRNNCellNotTrainable in
                    # contrib.rnn.python.kernel_tests.core_rnn_cell_test
                    with init_graph.as_default():
                        trainable_variables = tf_variables.trainable_variables(
                        )
                    if (trainable and self.trainable
                            and variable not in trainable_variables):
                        # A custom getter / variable scope overrode the trainable flag.
                        extra_trainable_vars = self._trainable_weights[
                            prev_len_trainable:]
                        self._trainable_weights = self._trainable_weights[:
                                                                          prev_len_trainable]
                        self._non_trainable_weights += extra_trainable_vars
        return variable
Exemplo n.º 37
0
def initialize_tpu_system(cluster_resolver=None):
  """Initialize the TPU devices.

  Args:
    cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
        which provides information about the TPU cluster.
  Returns:
    The tf.tpu.Topology object for the topology of the TPU cluster.

  Raises:
    RuntimeError: If no TPU devices found for eager execution or if run in a
        tf.function.
  """
  job = None
  if cluster_resolver is None:
    # If no cluster resolver is specified, and running eagerly, execute the init
    # ops in the current device scope.
    if context.executing_eagerly():
      curr_device = device.DeviceSpec.from_string(context.context().device_name)
      if curr_device.job is not None:
        job = "{}/replica:0/task:0".format(curr_device.job)

    cluster_resolver = TPUClusterResolver("")
  assert isinstance(cluster_resolver, TPUClusterResolver)

  tpu_name = compat.as_text(cluster_resolver._tpu)  # pylint: disable=protected-access
  if tpu_name in _INITIALIZED_TPU_SYSTEMS:
    logging.warning(
        "TPU system %s has already been initialized. "
        "Reinitializing the TPU can cause previously created "
        "variables on TPU to be lost.", tpu_name)

  logging.info("Initializing the TPU system: %s", tpu_name)

  if context.executing_eagerly():
    # This function looks as it is for the following non-intuitive reasons.
    # tpu.initialize_system creates a dummy op whose sole purpose is to trigger
    # DistributedTPURewritePass. This pass actually adds real ops that
    # initialize the TPU system. Thus, we can't simply run tpu.initialize_system
    # eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
    if tpu_name not in _LOCAL_MASTERS:
      # Explicitly place the tpu.initialize_system in the first worker to
      # avoid the output node match multiple devices error.
      job = "{}/replica:0/task:0".format(cluster_resolver.get_job_name())

    @function.defun
    def _tpu_init_fn():
      # In TF1, we usually close chips when compilation fails to clear the data
      # in infeed. In TF2, we don't need to do this because infeed is no longer
      # used, so user can recover from TPU compilation failures more smoothly.
      return tpu.initialize_system(
          job=job, compilation_failure_closes_chips=False)

    # The TPU_SYSTEM device must match the device used in tpu.initialize_system
    # exactly, otherwise you can get errors if there are multiple TPU_SYSTEM
    # devices available.
    with ops.device(tpu._tpu_system_device_name(job)):  # pylint: disable=protected-access
      output = _tpu_init_fn()

    # Clear out the eager context caches since the memory is invalid now.
    logging.info("Clearing out eager caches")
    context.context()._clear_caches()  # pylint: disable=protected-access

    serialized_topology = output.numpy()

    # TODO(b/134094971): Remove this when lazy tensor copy in multi-device
    # function has been implemented.
    context.context().mirroring_policy = context.MIRRORING_ALL
  elif not ops.executing_eagerly_outside_functions():
    master = cluster_resolver.master()
    cluster_spec = cluster_resolver.cluster_spec()

    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
    if cluster_spec:
      session_config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

    with ops.Graph().as_default():
      with session_lib.Session(config=session_config, target=master) as sess:
        serialized_topology = sess.run(tpu.initialize_system())
  else:
    raise RuntimeError("initialize_tpu_system is not supported within "
                       "tf.functions.")

  logging.info("Finished initializing TPU system.")
  tpu_topology = topology.Topology(serialized=serialized_topology)
  _INITIALIZED_TPU_SYSTEMS[tpu_name] = tpu_topology

  return tpu_topology
Exemplo n.º 38
0
  def __exit__(self, error_type, unused_value, unused_traceback):
    if error_type:
      # Allow errors that occurred inside this context manager to pass through
      # normally.
      return

    # Only run in V2 Function mode.
    if (context.executing_eagerly() or
        not ops.executing_eagerly_outside_functions()):
      return

    if (self._graph is not ops.get_default_graph() or
        self._graph.name != 'keras_graph'):
      # Only auto-track updates when the Keras Graph is the only one used.
      return

    new_operations = self._graph.get_operations()[self._num_operations:]
    new_stateful_ops = set()

    # pylint: disable=protected-access
    for op in new_operations:
      # While loop is not supported in general for automatic control
      # dependencies.
      if control_flow_util.IsInWhileLoop(op):
        continue

      # Track stateful ops via `add_update`.
      is_stateful_op = (
          op.type not in self._graph._registered_ops or
          auto_control_deps.op_is_stateful(
              self._graph._registered_ops[op.type]))

      # Ignore ReadVariableOps as they are not needed to be run separately.
      # This ensures existing Layers don't get extra updates.
      if is_stateful_op and op.type != 'ReadVariableOp':
        new_stateful_ops.add(op)

    explicit_updates = set(
        [u for u in self.layer._unfiltered_updates if not isinstance(u, tuple)])
    # pylint: enable=protected-access

    # Don't add updates that will already be run by virtue of being consumed by
    # other stateful ops or by the Layer's outputs. This ensures that existing
    # Layers like `BatchNormalization` continue to return the same values for
    # `.update` calls.
    minimum_ops = set()
    targets = new_stateful_ops.union(
        set(nest.flatten(self.outputs)), explicit_updates)
    for op in new_stateful_ops:
      # Scrub any ops that are consumed by the outputs or other stateful ops.
      reachable = tf_utils.get_reachable_from_inputs(op)
      if not (targets - {op}).intersection(reachable):
        minimum_ops.add(op)
    new_stateful_ops = minimum_ops

    # Don't double-track updates added via explicitly calling `add_update`.
    # Also don't double-track updates already tracked in sublayers.
    new_stateful_ops = new_stateful_ops - explicit_updates

    # Decide whether to track as input-conditional or unconditional.
    input_reachable_ops = tf_utils.get_reachable_from_inputs(
        self.inputs, targets=new_stateful_ops)
    unconditional_updates = new_stateful_ops - input_reachable_ops
    conditional_updates = new_stateful_ops - unconditional_updates

    if unconditional_updates:
      self.layer.add_update(list(unconditional_updates))
    if conditional_updates:
      self.layer.add_update(list(conditional_updates), inputs=self.inputs)
Exemplo n.º 39
0
  def add_weight(self,
                 name,
                 shape,
                 dtype=None,
                 initializer=None,
                 regularizer=None,
                 trainable=None,
                 constraint=None,
                 use_resource=None,
                 synchronization=vs.VariableSynchronization.AUTO,
                 aggregation=vs.VariableAggregation.NONE,
                 partitioner=None,
                 **kwargs):
    """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
        Note, if the current variable scope is marked as non-trainable
        then this parameter is ignored and any added variables are also
        marked as non-trainable. `trainable` defaults to `True` unless
        `synchronization` is set to `ON_READ`.
      constraint: constraint instance (callable).
      use_resource: Whether to use `ResourceVariable`.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
        `AUTO` and the current `DistributionStrategy` chooses
        when to synchronize. If `synchronization` is set to `ON_READ`,
        `trainable` must not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
      partitioner: (optional) partitioner instance (callable).  If
        provided, when the requested variable is created it will be split
        into multiple partitions according to `partitioner`.  In this case,
        an instance of `PartitionedVariable` is returned.  Available
        partitioners include `tf.fixed_size_partitioner` and
        `tf.variable_axis_size_partitioner`.  For more details, see the
        documentation of `tf.get_variable` and the  "Variable Partitioners
        and Sharding" section of the API guide.
      **kwargs: Additional keyword arguments.

    Returns:
      The created variable.  Usually either a `Variable` or `ResourceVariable`
      instance.  If `partitioner` is not `None`, a `PartitionedVariable`
      instance is returned.

    Raises:
      RuntimeError: If called with partioned variable regularization and
        eager execution is enabled.
      ValueError: When trainable has been set to True with synchronization
        set as `ON_READ`.
    """
    for kwarg in kwargs:
      if kwarg != 'experimental_autocast':
        raise TypeError('Unknown keyword argument:', kwarg)
    if self._keras_style:
      return super(Layer, self).add_weight(
          name=name,
          shape=shape,
          dtype=dtype,
          initializer=initializer,
          regularizer=regularizer,
          trainable=trainable,
          constraint=constraint,
          use_resource=use_resource,
          synchronization=vs.VariableSynchronization.AUTO,
          aggregation=vs.VariableAggregation.NONE,
          partitioner=partitioner,
          **kwargs)

    if synchronization == vs.VariableSynchronization.ON_READ:
      if trainable:
        raise ValueError(
            'Synchronization value can be set to '
            'VariableSynchronization.ON_READ only for non-trainable variables. '
            'You have specified trainable=True and '
            'synchronization=VariableSynchronization.ON_READ.')
      else:
        # Set trainable to be false when variable is to be synced on read.
        trainable = False
    elif trainable is None:
      trainable = True

    def _should_add_regularizer(variable, existing_variable_set):
      if isinstance(variable, tf_variables.PartitionedVariable):
        for var in variable:
          if var in existing_variable_set:
            return False
        return True
      else:
        return variable not in existing_variable_set

    init_graph = None
    if not context.executing_eagerly():
      default_graph = ops.get_default_graph()
      if default_graph.building_function:
        with ops.init_scope():
          # Retrieve the variables from the graph into which variables
          # will be lifted; if initialization ops will be lifted into
          # the eager context, then there is nothing to retrieve, since variable
          # collections are not supported when eager execution is enabled.
          if not context.executing_eagerly():
            init_graph = ops.get_default_graph()
            existing_variables = set(tf_variables.global_variables())
      else:
        # Initialization ops will not be lifted out of the default graph.
        init_graph = default_graph
        existing_variables = set(tf_variables.global_variables())

    if dtype is None:
      dtype = self.dtype or dtypes.float32

    self._set_scope(None)
    reuse = self.built or self._reuse
    prev_len_trainable = len(self._trainable_weights)
    with vs.variable_scope(
        self._scope, reuse=reuse, auxiliary_name_scope=False) as scope:
      self._current_scope = scope
      with ops.name_scope(self._name_scope()):
        use_resource = (use_resource or
                        self._use_resource_variables or
                        scope.use_resource)
        if initializer is None:
          initializer = scope.initializer
        variable = super(Layer, self).add_weight(
            name,
            shape,
            dtype=dtypes.as_dtype(dtype),
            initializer=initializer,
            trainable=trainable,
            constraint=constraint,
            partitioner=partitioner,
            use_resource=use_resource,
            synchronization=synchronization,
            aggregation=aggregation,
            getter=vs.get_variable,
            **kwargs)

        if regularizer:
          if (ops.executing_eagerly_outside_functions()
              or _should_add_regularizer(variable, existing_variables)):
            self._handle_weight_regularization(name, variable, regularizer)

        if init_graph is not None:
          # Handle edge case where a custom getter has overridden `trainable`.
          # There is one known occurrence of this, in unit test
          # testBasicRNNCellNotTrainable in
          # contrib.rnn.python.kernel_tests.core_rnn_cell_test
          with init_graph.as_default():
            trainable_variables = tf_variables.trainable_variables()
          if (trainable and self.trainable and
              variable not in trainable_variables):
            # A custom getter / variable scope overrode the trainable flag.
            extra_trainable_vars = self._trainable_weights[prev_len_trainable:]
            self._trainable_weights = self._trainable_weights[
                :prev_len_trainable]
            self._non_trainable_weights += extra_trainable_vars
    return variable
Exemplo n.º 40
0
    def __call__(self, inputs, *args, **kwargs):
        """Wraps `call`, applying pre- and post-processing steps.

    Args:
      inputs: input tensor(s).
      *args: additional positional arguments to be passed to `self.call`.
      **kwargs: additional keyword arguments to be passed to `self.call`.
        **Note**: kwarg `scope` is reserved for use by the layer.

    Returns:
      Output tensor(s).

    Note:
      - If the layer's `call` method takes a `scope` keyword argument,
        this argument will be automatically set to the current variable scope.
      - If the layer's `call` method takes a `mask` argument (as some Keras
        layers do), its default value will be set to the mask generated
        for `inputs` by the previous layer (if `input` did come from
        a layer that generated a corresponding mask, i.e. if it came from
        a Keras layer with masking support.

    Raises:
      ValueError: if the layer's `call` method returns None (an invalid value).
    """
        scope = kwargs.pop('scope', None)

        if self._keras_style:
            if scope is not None:
                raise ValueError(
                    'scope argument not allowed when keras style layers are enabled, '
                    'but saw: {}'.format(scope))
            return super(Layer, self).__call__(inputs, *args, **kwargs)

        self._set_scope(scope)

        if self.built:
            try:
                # Some classes which inherit from Layer do not use its constructor, so
                # rather than initializing to None we check for an AttributeError.
                scope_context_manager = self._always_reuse_variable_scope  # pylint: disable=access-member-before-definition
            except AttributeError:
                scope_context_manager = None

            if scope_context_manager is None:
                # From this point we will always set reuse=True, so create a "final"
                # variable scope with this setting. We avoid re-creating variable scopes
                # after this point as an optimization.
                scope_context_manager = vs.variable_scope(
                    self._scope, reuse=True, auxiliary_name_scope=False)

                # Do not cache variable scopes if Eager mode is enabled. If Eager mode
                # is enabled then we don't want to reuse scopes because the cached scope
                # might be from a FuncGraph or Eager scope we are no longer in.
                if not ops.executing_eagerly_outside_functions():
                    self._always_reuse_variable_scope = scope_context_manager
        else:
            scope_context_manager = vs.variable_scope(
                self._scope, reuse=self._reuse, auxiliary_name_scope=False)

        with scope_context_manager as scope:
            self._current_scope = scope

            try:
                call_has_scope_arg = self._call_has_scope_arg
            except AttributeError:
                self._call_fn_args = variable_scope_shim.fn_args(self.call)
                self._call_has_scope_arg = 'scope' in self._call_fn_args
                call_has_scope_arg = self._call_has_scope_arg
            if call_has_scope_arg:
                kwargs['scope'] = scope

            # Actually call layer
            outputs = super(Layer, self).__call__(inputs, *args, **kwargs)

        if not context.executing_eagerly():
            # Update global default collections.
            _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS)
        return outputs
Exemplo n.º 41
0
def _instrument_symbolic_tensors(tensors, op_name, tfdbg_context_id):
  """Add debugging instrumentation for symbolic (i.e., non-eager) tensors.

  The detailed fashion in which the tensors are instrumented is determined
  by the tensor_debug_mode configured for the currently enabled dumping
  callback.

  Args:
    tensors: A tuple of Tensors to instrument. It is assumed that their ordering
      corresponds to the ordering of output tensors of an original op. Output
      slot indices (0-based) will be generated based on the ordering.
    op_name: Name of the op that emits the Tensors.
    tfdbg_context_id: A unique ID for the context that the op belongs to (e.g.,
      a graph).

  Returns:
    Non-eager Tensors that override the `tensors` as the output of the op
    that originally generated `tensors`. In some cases (e.g., non-V1 graph
    mode), this may be `None`, as the instrumentation can simply rely on
    automatic control dependencies (see `auto_control_deps.py`) instead of
    tensor overriding.
  """
  tensor_debug_mode = _state.config.tensor_debug_mode
  debug_urls = ["file://%s" % _state.config.dump_root]
  is_v1_graph_mode = not ops.executing_eagerly_outside_functions()
  instrumented_tensors = [] if is_v1_graph_mode else None
  if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
    for output_slot, tensor in enumerate(tensors):
      with ops.colocate_with(None, ignore_existing=True):
        # Except in V1 graph mode + control flow, debug_identity_v2 trigger auto
        # control dependency because it's a stateful op.
        debug_tensor = gen_debug_ops.debug_identity_v2(
            # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
            # as a low-overhead placeholder, since no actual tensor value is
            # traced.
            constant_op.constant([], dtype=dtypes.float32),
            tfdbg_context_id=tfdbg_context_id,
            op_name=op_name,
            output_slot=output_slot,
            tensor_debug_mode=_state.config.tensor_debug_mode,
            debug_urls=debug_urls)
        if is_v1_graph_mode:
          # TODO(cais): Evaluate performance optimization options. For the
          # `NO_TENSOR` debug mode, an alternative is to add `debug_tensor` as a
          # control dependency of `tensor.op` without an additional identity op.
          identity = array_ops.identity(tensor)
          identity.op._add_control_input(  # pylint: disable=protected-access
              debug_tensor.op)
          instrumented_tensors.append(identity)
    return instrumented_tensors
  elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
    for output_slot, tensor in enumerate(tensors):
      with ops.colocate_with(None, ignore_existing=True):
        debug_tensor = gen_debug_ops.debug_identity_v2(
            tensor,
            tfdbg_context_id=tfdbg_context_id,
            op_name=op_name,
            output_slot=output_slot,
            tensor_debug_mode=_state.config.tensor_debug_mode,
            debug_urls=debug_urls)
        if is_v1_graph_mode:
          instrumented_tensors.append(debug_tensor)
    return instrumented_tensors
  else:
    raise NotImplementedError(
        "Symbolic tensor instrumentation is not implemented for debug mode %s" %
        _state.config.tensor_debug_mode)
Exemplo n.º 42
0
def eager_py_func(func, inp, Tout, name=None):
  """Wraps a python function into a TensorFlow op that executes it eagerly.

  This function allows expressing computations in a TensorFlow graph as
  Python functions. In particular, it wraps a Python function `func`
  in a once-differentiable TensorFlow operation that executes it with eager
  execution enabled. As a consequence, `tf.py_function` makes it
  possible to express control flow using Python constructs (`if`, `while`,
  `for`, etc.), instead of TensorFlow control flow constructs (`tf.cond`,
  `tf.while_loop`). For example, you might use `tf.py_function` to
  implement the log huber function:

  ```python
  def log_huber(x, m):
    if tf.abs(x) <= m:
      return x**2
    else:
      return m**2 * (1 - 2 * tf.math.log(m) + tf.math.log(x**2))

  x = tf.compat.v1.placeholder(tf.float32)
  m = tf.compat.v1.placeholder(tf.float32)

  y = tf.py_function(func=log_huber, inp=[x, m], Tout=tf.float32)
  dy_dx = tf.gradients(y, x)[0]

  with tf.compat.v1.Session() as sess:
    # The session executes `log_huber` eagerly. Given the feed values below,
    # it will take the first branch, so `y` evaluates to 1.0 and
    # `dy_dx` evaluates to 2.0.
    y, dy_dx = sess.run([y, dy_dx], feed_dict={x: 1.0, m: 2.0})
  ```

  You can also use `tf.py_function` to debug your models at runtime
  using Python tools, i.e., you can isolate portions of your code that
  you want to debug, wrap them in Python functions and insert `pdb` tracepoints
  or print statements as desired, and wrap those functions in
  `tf.py_function`.

  For more information on eager execution, see the
  [Eager guide](https://tensorflow.org/guide/eager).

  `tf.py_function` is similar in spirit to `tf.compat.v1.py_func`, but unlike
  the latter, the former lets you use TensorFlow operations in the wrapped
  Python function. In particular, while `tf.compat.v1.py_func` only runs on CPUs
  and
  wraps functions that take NumPy arrays as inputs and return NumPy arrays as
  outputs, `tf.py_function` can be placed on GPUs and wraps functions
  that take Tensors as inputs, execute TensorFlow operations in their bodies,
  and return Tensors as outputs.

  Like `tf.compat.v1.py_func`, `tf.py_function` has the following limitations
  with respect to serialization and distribution:

  * The body of the function (i.e. `func`) will not be serialized in a
    `GraphDef`. Therefore, you should not use this function if you need to
    serialize your model and restore it in a different environment.

  * The operation must run in the same address space as the Python program
    that calls `tf.py_function()`. If you are using distributed
    TensorFlow, you must run a `tf.distribute.Server` in the same process as the
    program that calls `tf.py_function()` and you must pin the created
    operation to a device in that server (e.g. using `with tf.device():`).


  Args:
    func: A Python function that accepts `inp` as arguments, and returns a
      value (or list of values) whose type is described by `Tout`.

    inp: Input arguments for `func`.  A list whose elements are `Tensor`s or
      `CompositeTensors` (such as `tf.RaggedTensor`); or a single `Tensor` or
      `CompositeTensor`.

    Tout: The type(s) of the value(s) returned by `func`.  One of the
      following.

      * If `func` returns a `Tensor` (or a value that can be converted to a
        Tensor): the `tf.DType` for that value.
      * If `func` returns a `CompositeTensor`: The `tf.TypeSpec` for that value.
      * If `func` returns `None`: the empty list (`[]`).
      * If `func` returns a list of `Tensor` and `CompositeTensor` values:
        a corresponding list of `tf.DType`s and `tf.TypeSpec`s for each value.

    name: A name for the operation (optional).

  Returns:
    The value(s) computed by `func`: a `Tensor`, `CompositeTensor`, or list of
    `Tensor` and `CompositeTensor`; or an empty list if `func` returns `None`.
  """
  if ops.executing_eagerly_outside_functions():
    with ops.device(context.context().host_address_space()):
      return _internal_py_func(
          func=func, inp=inp, Tout=Tout, use_eager_py_func=True, name=name)

  return _internal_py_func(
      func=func, inp=inp, Tout=Tout, use_eager_py_func=True, name=name)
Exemplo n.º 43
0
def py_func_common(func, inp, Tout, stateful=True, name=None):
  """Wraps a python function and uses it as a TensorFlow op.

  Given a python function `func`, which takes numpy arrays as its
  arguments and returns numpy arrays as its outputs, wrap this function as an
  operation in a TensorFlow graph. The following snippet constructs a simple
  TensorFlow graph that invokes the `np.sinh()` NumPy function as a operation
  in the graph:

  ```python
  def my_func(x):
    # x will be a numpy array with the contents of the placeholder below
    return np.sinh(x)
  input = tf.compat.v1.placeholder(tf.float32)
  y = tf.compat.v1.py_func(my_func, [input], tf.float32)
  ```

  **N.B.** The `tf.compat.v1.py_func()` operation has the following known
  limitations:

  * The body of the function (i.e. `func`) will not be serialized in a
    `GraphDef`. Therefore, you should not use this function if you need to
    serialize your model and restore it in a different environment.

  * The operation must run in the same address space as the Python program
    that calls `tf.compat.v1.py_func()`. If you are using distributed
    TensorFlow, you
    must run a `tf.distribute.Server` in the same process as the program that
    calls
    `tf.compat.v1.py_func()` and you must pin the created operation to a device
    in that
    server (e.g. using `with tf.device():`).

  Note: It produces tensors of unknown shape and rank as shape inference
    does not work on arbitrary Python code.
    If you need the shape, you need to set it based on statically
    available information.

    E.g.
    ```python
    import tensorflow as tf
    import numpy as np

    def make_synthetic_data(i):
        return np.cast[np.uint8](i) * np.ones([20,256,256,3],
                dtype=np.float32) / 10.

    def preprocess_fn(i):
        ones = tf.py_function(make_synthetic_data,[i],tf.float32)
        ones.set_shape(tf.TensorShape([None, None, None, None]))
        ones = tf.image.resize(ones, [224,224])
        return ones

    ds = tf.data.Dataset.range(10)
    ds = ds.map(preprocess_fn)
    ```

  Args:
    func: A Python function, which accepts `ndarray` objects as arguments and
      returns a list of `ndarray` objects (or a single `ndarray`). This function
      must accept as many arguments as there are tensors in `inp`, and these
      argument types will match the corresponding `tf.Tensor` objects in `inp`.
      The returns `ndarray`s must match the number and types defined `Tout`.
      Important Note: Input and output numpy `ndarray`s of `func` are not
        guaranteed to be copies. In some cases their underlying memory will be
        shared with the corresponding TensorFlow tensors. In-place modification
        or storing `func` input or return values in python datastructures
        without explicit (np.)copy can have non-deterministic consequences.
    inp: A list of `Tensor` objects.
    Tout: A list or tuple of tensorflow data types or a single tensorflow data
      type if there is only one, indicating what `func` returns.
    stateful: (Boolean.) If True, the function should be considered stateful. If
      a function is stateless, when given the same input it will return the same
      output and have no observable side effects. Optimizations such as common
      subexpression elimination are only performed on stateless operations.
    name: A name for the operation (optional).

  Returns:
    A list of `Tensor` or a single `Tensor` which `func` computes.

  @compatibility(TF2)

  This name was deprecated and removed in TF2, but `tf.numpy_function` is a
  near-exact replacement, just drop the `stateful` argument (all
  `tf.numpy_function` calls are considered stateful). It is compatible with
  eager execution and `tf.function`.

  `tf.py_function` is a close but not an exact replacement, passing TensorFlow
  tensors to the wrapped function instead of NumPy arrays, which provides
  gradients and can take advantage of accelerators.

  Before:

  >>> def fn_using_numpy(x):
  ...   x[0] = 0.
  ...   return x
  >>> tf.compat.v1.py_func(fn_using_numpy, inp=[tf.constant([1., 2.])],
  ...     Tout=tf.float32, stateful=False)
  <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 2.], dtype=float32)>

  After:

  >>> tf.numpy_function(fn_using_numpy, inp=[tf.constant([1., 2.])],
  ...     Tout=tf.float32)
  <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 2.], dtype=float32)>

  @end_compatibility

  """
  if context.executing_eagerly():
    result = func(*[np.array(x) for x in inp])
    result = nest.flatten(result)

    result = [x if x is None else ops.convert_to_tensor(x) for x in result]
    if len(result) == 1:
      # Mimic the automatic unwrapping in graph-mode py_func
      result, = result
    return result

  if ops.executing_eagerly_outside_functions():
    with ops.device(context.context().host_address_space()):
      return _internal_py_func(
          func=func,
          inp=inp,
          Tout=Tout,
          stateful=stateful,
          use_eager_py_func=False,
          name=name)

  return _internal_py_func(
      func=func,
      inp=inp,
      Tout=Tout,
      stateful=stateful,
      use_eager_py_func=False,
      name=name)
    def _initialize_local(self, cluster_resolver, devices=None):
        """Initializes the object for local training."""
        self._is_chief = True
        self._num_workers = 1

        if ops.executing_eagerly_outside_functions():
            try:
                context.context().configure_collective_ops(
                    scoped_allocator_enabled_ops=("CollectiveReduce", ))
            except RuntimeError:
                logging.warning(
                    "Collective ops is not configured at program startup. "
                    "Some performance features may not be enabled.")
            self._collective_ops_configured = True

        if devices:
            local_devices = devices
            if "GPU" in devices[0]:
                local_device_type = "GPU"
            elif "TPU" in devices[0]:
                local_device_type = "TPU"
            else:
                local_device_type = "CPU"
        else:
            local_devices, local_device_type = self._initialize_local_devices(
                cluster_resolver, worker_device="")

        self._worker_device = device_util.canonicalize("/device:CPU:0")
        self._host_input_device = numpy_dataset.SingleDevice(
            self._worker_device)

        self._collective_keys = cross_device_utils.CollectiveKeys(
            group_key_start=1 + self._collective_key_base)
        self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
            devices=local_devices,
            group_size=len(local_devices),
            collective_keys=self._collective_keys)
        # CrossDeviceOps for per host tensors.
        self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
            devices=[self._worker_device],
            group_size=self._num_workers,
            collective_keys=self._collective_keys)
        super(CollectiveAllReduceExtended,
              self)._initialize_single_worker(local_devices)

        self._cluster_spec = None
        self._task_type = None
        self._task_id = None
        self._id_in_cluster = 0

        # This is a mark to tell whether we are running with standalone client or
        # independent worker. Right now with standalone client, strategy object is
        # created as local strategy and then turn into multi-worker strategy via
        # configure call.
        self._local_or_standalone_client_mode = True

        # Save the num_devices_per_worker and rpc_layer for configure method.
        self._num_devices_per_worker = len(local_devices)
        self._local_device_type = local_device_type
        self._rpc_layer = cluster_resolver.rpc_layer
        self._warn_nccl_no_gpu()

        logging.info(
            "Single-worker MultiWorkerMirroredStrategy with local_devices "
            "= %r, communication = %s", local_devices,
            self._communication_options.implementation)
Exemplo n.º 45
0
def maybe_init_scope():
    if ops.executing_eagerly_outside_functions():
        yield
    else:
        with ops.init_scope():
            yield
 def _check_layer_class(self, layer):
     if ops.executing_eagerly_outside_functions():
         self.assertIsInstance(layer, base_layer.Layer)
         self.assertNotIsInstance(layer, base_layer_v1.Layer)
     else:
         self.assertIsInstance(layer, base_layer_v1.Layer)
Exemplo n.º 47
0
def save_model(model,
               filepath,
               overwrite=True,
               include_optimizer=True,
               save_format=None):
  """Saves a model as a TensorFlow SavedModel or HDF5 file.

  The saved model contains:
      - the model's configuration (topology)
      - the model's weights
      - the model's optimizer's state (if any)

  Thus the saved model can be reinstantiated in
  the exact same state, without any of the code
  used for model definition or training.

  Arguments:
      model: Keras model instance to be saved.
      filepath: One of the following:
        - String, path where to save the model
        - `h5py.File` object where to save the model
      overwrite: Whether we should overwrite any existing model at the target
        location, or instead ask the user with a manual prompt.
      include_optimizer: If True, save optimizer's state together.
      save_format: Either 'tf' or 'h5', indicating whether to save the model
        to Tensorflow SavedModel or HDF5. The 'tf' option is currently disabled,
        and will be enabled when Keras SavedModel export is no longer
        experimental. (The experimental function is
        tf.keras.experimental.export_saved_model).

  Raises:
      ImportError: If save format is hdf5, and h5py is not available.
  """
  from tensorflow.python.keras.engine import sequential  # pylint: disable=g-import-not-at-top

  if (not tf2.enabled() and
      not ops.executing_eagerly_outside_functions()
      and save_format == 'tf'):
    raise NotImplementedError(
        'Saving the model as SavedModel is not supported in TensorFlow 1.X'
        'graph mode. Please enable eager execution or use the "h5" save format.'
        )

  if _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL and save_format == 'tf':
    raise NotImplementedError(
        'Saving the model as SavedModel is still in experimental stages. '
        'Please use tf.keras.experimental.export_saved_model, or use '
        'save_format="h5" to save to HDF5.')

  # TODO(kathywu): Remove this when Keras SavedModel is not experimental.
  save_format = 'h5'

  if (save_format == 'h5' or
      (h5py is not None and isinstance(filepath, h5py.File)) or
      os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS):
    # TODO(b/130258301): add utility method for detecting model type.
    if (not model._is_graph_network and  # pylint:disable=protected-access
        not isinstance(model, sequential.Sequential)):
      raise NotImplementedError(
          'Saving the model to HDF5 format requires the model to be a '
          'Functional model or a Sequential model. It does not work for '
          'subclassed models, because such models are defined via the body of '
          'a Python method, which isn\'t safely serializable. Consider saving '
          'to the Tensorflow SavedModel format (by setting save_format="tf") '
          'or using `save_weights`.')
    hdf5_format.save_model_to_hdf5(
        model, filepath, overwrite, include_optimizer)
    return
Exemplo n.º 48
0
 def _build_graph_network_for_inferred_shape(self,
                                             input_shape,
                                             input_dtype=None):
   if input_shape is None or not self.layers:
     return
   if not tf2.enabled() or not ops.executing_eagerly_outside_functions():
     # This behavior is disabled in V1 or when eager execution is disabled.
     return
   if (not self._has_explicit_input_shape and
       not self._use_legacy_deferred_behavior):
     # Determine whether the input shape is novel, i.e. whether the model
     # should be rebuilt.
     input_shape = tuple(input_shape)
     if self._inferred_input_shape is None:
       new_shape = input_shape
     else:
       new_shape = relax_input_shape(self._inferred_input_shape, input_shape)
     if (new_shape is not None and new_shape != self._inferred_input_shape):
       # A novel shape has been received: we need to rebuild the model.
       # In case we are inside a graph function, we step out of it.
       with ops.init_scope():
         inputs = input_layer.Input(
             batch_shape=new_shape,
             dtype=input_dtype,
             name=self.layers[0].name + '_input')
         layer_input = inputs
         created_nodes = set()
         for layer in self.layers:
           # Clear nodes previously created via this method. This prevents
           # node accumulation and ensures that e.g. `layer.output` is
           # always connected to `model.inputs`
           # (this is important e.g. for the feature extraction use case).
           # We don't just do `layer._inbound_nodes = []` in order
           # not to break shared layers added to Sequential models (which is
           # technically illegal as per the `add()` docstring,
           # but wasn't previously disabled).
           clear_previously_created_nodes(layer, self._created_nodes)
           try:
             # Create Functional API connection by calling the current layer
             layer_output = layer(layer_input)
           except:  # pylint:disable=bare-except
             # Functional API calls may fail for a number of reasons:
             # 1) The layer may be buggy. In this case it will be easier for
             # the user to debug if we fail on the first call on concrete data,
             # instead of our own call on a symbolic input.
             # 2) The layer is dynamic (graph-incompatible) and hasn't
             # overridden `compute_output_shape`. In this case, it is
             # impossible to build a graph network.
             # 3) The layer is otherwise incompatible with the Functional API
             # (e.g. this is the case for some probabilistic layers that rely
             # on hacks and that do not return tensors).
             # In all these cases, we should avoid creating a graph network
             # (or we simply can't).
             self._use_legacy_deferred_behavior = True
             return
           if len(nest.flatten(layer_output)) != 1:
             raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
           # Keep track of nodes just created above
           track_nodes_created_by_last_call(layer, created_nodes)
           layer_input = layer_output
           outputs = layer_output
         self._created_nodes = created_nodes
         try:
           # Initialize a graph Network. This call will never fail for
           # a stack of valid Keras layers.
           # However some users have layers that are fundamentally incompatible
           # with the Functional API, which do not return tensors. In this
           # case, we fall back to the legacy deferred behavior.
           # TODO(fchollet): consider raising here, as we should not be
           # supporting such layers.
           self._init_graph_network(inputs, outputs)
           self._graph_initialized = True
         except:  # pylint:disable=bare-except
           self._use_legacy_deferred_behavior = True
       self._inferred_input_shape = new_shape