示例#1
0
def _SwitchRefOrTensor(data, pred, name="Switch"):
  """Forwards `data` to an output determined by `pred`.

  If `pred` is true, the `data` input is forwared to the first output.
  Otherwise, the data goes to the second output.

  This op handles `Tensor`s and `IndexedSlices`.

  Args:
    data: The tensor to be forwarded to the appropriate output.
    pred: A scalar that specifies which output port will receive data.
    name: A name for this operation (optional).

  Returns:
    `(output_false, output_false)`: If `pred` is true, data will be forwarded to
    `output_true`, otherwise it goes to `output_false`.

  Raises:
    TypeError: if data is not a Tensor or IndexedSlices
  """
  data = ops.convert_to_tensor_or_indexed_slices(data, name="data")
  if isinstance(data, ops.Tensor):
    if not data.dtype.is_ref_dtype:
      return switch(data, pred, name=name)
    else:
      return ref_switch(data, pred, name=name)
  else:
    return switch(data, pred, name=name)
示例#2
0
    def update(v, g):
      """Apply gradients to a replica variable."""
      assert v is not None

      try:
        # Convert the grad to Tensor or IndexedSlices if necessary.
        g = ops.convert_to_tensor_or_indexed_slices(g)
      except TypeError:
        raise TypeError("Gradient must be convertible to a Tensor"
                        " or IndexedSlices, or None: %s" % g)
      if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
        raise TypeError(
            "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
      p = _get_processor(v)

      if context.executing_eagerly() or (
          resource_variable_ops.is_resource_variable(v) and
          not v._in_graph_mode):  # pylint: disable=protected-access
        scope_name = v.name.split(":")[0]
      else:
        scope_name = v.op.name

      # device_policy is set because non-mirrored tensors will be read in
      # `update_op`. `_resource_apply_dense`, `lr_t`, `beta1_t` and `beta2_t`
      # is an example.
      with ops.name_scope("update_" + scope_name):
        return p.update_op(self, g)
示例#3
0
def with_dependencies(dependencies, output_tensor, name=None):
  """Produces the content of `output_tensor` only after `dependencies`.

  In some cases, a user may want the output of an operation to be
  consumed externally only after some other dependencies have run
  first. This function ensures returns `output_tensor`, but only after all
  operations in `dependencies` have run. Note that this means that there is
  no guarantee that `output_tensor` will be evaluated after any `dependencies`
  have run.

  See also `tuple` and `group`.

  Args:
    dependencies: A list of operations to run before this op finishes.
    output_tensor: A `Tensor` or `IndexedSlices` that will be returned.
    name: (Optional) A name for this operation.

  Returns:
    Same as `output_tensor`.

  Raises:
    TypeError: if `output_tensor` is not a `Tensor` or `IndexedSlices`.
  """
  with ops.op_scope(dependencies + [output_tensor], name,
                   "control_dependency") as name:
    with ops.device(output_tensor.device
                    or ops.get_default_graph().get_default_device()):
      with ops.control_dependencies(dependencies):
        output_tensor = ops.convert_to_tensor_or_indexed_slices(output_tensor)
        if isinstance(output_tensor, ops.Tensor):
          return _Identity(output_tensor, name=name)
        else:
          return ops.IndexedSlices(_Identity(output_tensor.values, name=name),
                                   output_tensor.indices,
                                   output_tensor.dense_shape)
示例#4
0
def _AsTensorList(x, p):
  """Return x as a list of Tensors or IndexedSlices.

  For entries of `x` that are Operations, this returns an Identity of `p`
  with a dependency on the operation.

  Args:
    x: A Tensor/IndexedSlices/Operation or a list or tuple of them.
    p: A Tensor to return for entries in `x` that are Operations.

  Returns:
    A list of Tensors or IndexedSlices.
  """
  if not isinstance(x, list) and not isinstance(x, _basetuple):
    x = [x]

  l = []
  for v in x:
    if isinstance(v, ops.Operation):
      v = with_dependencies([v], p)
    v = ops.convert_to_tensor_or_indexed_slices(v)
    if isinstance(v, ops.Tensor):
      l.append(array_ops.identity(v))
    else:
      l.append(ops.IndexedSlices(array_ops.identity(v.values),
                                 array_ops.identity(v.indices)))
  return l
示例#5
0
def switch(data, pred, name=None):
  """Forwards `data` to an output determined by `pred`.

  If `pred` is true, the `data` input is forwared to the first output.
  Otherwise, the data goes to the second output.

  This op handles `Tensor`s and `IndexedSlices`.

  Args:
    data: The tensor to be forwarded to the appropriate output.
    pred: A scalar that specifies which output port will receive data.
    name: A name for this operation (optional).

  Returns:
    `(output_true, output_false)`: If `pred` is true, data will be forwarded to
    `output_true`, otherwise it goes to `output_false`.
  """
  with ops.op_scope([data, pred], name, "Switch") as name:
    data = ops.convert_to_tensor_or_indexed_slices(data, name="data")
    pred = ops.convert_to_tensor(pred, name="pred")
    if isinstance(data, ops.Tensor):
      return gen_control_flow_ops._switch(data, pred, name=name)
    else:
      val, ind, dense_shape = data.values, data.indices, data.dense_shape
      val_f, val_t = gen_control_flow_ops._switch(val, pred, name=name)
      ind_f, ind_t = gen_control_flow_ops._switch(ind, pred, name="indices")
      if dense_shape:
        dense_shape_f, dense_shape_t = gen_control_flow_ops._switch(
            dense_shape, pred, name="dense_shape")
      else:
        dense_shape_f, dense_shape_t = None, None
      return (ops.IndexedSlices(val_f, ind_f, dense_shape_f),
              ops.IndexedSlices(val_t, ind_t, dense_shape_t))
示例#6
0
def merge(inputs, name=None):
  """Returns the value of an available element of `inputs`.

  This op tests each of the tensors in `inputs` in turn to determine if any of
  them is available. If it finds an available tensor, it returns it and its
  index in `inputs`.

  It is an error if more than one tensor in `inputs` is available. If no tensor
  in `inputs` is available, the returned tensor and index are not set.

  This op handles both `Tensor`s and `IndexedSlices`. If inputs has a mix of
  `Tensor`s and `IndexedSlices`, all inputs are converted to IndexedSlices
  before merging.

  Args:
    inputs: The input tensors, at most one of which is available.
    name: A name for this operation (optional).

  Returns:
    A tuple containing the chosen input tensor and its index in `inputs`.

  Raises:
    ValueError: If inputs are IndexedSlices and some but not all have a
      dense_shape property.
  """
  with ops.op_scope(inputs, name, "Merge") as name:
    inputs = [ops.convert_to_tensor_or_indexed_slices(inp) for inp in inputs]
    if all([isinstance(inp, ops.Tensor) for inp in inputs]):
      return gen_control_flow_ops._merge(inputs, name=name)
    else:
      inputs = math_ops._as_indexed_slices_list(inputs)
      values, _ = gen_control_flow_ops._merge([inp.values for inp in inputs],
                                              name=name)
      indices, chosen_index = gen_control_flow_ops._merge(
          [inp.indices for inp in inputs], name="indices")
      if any(inp.dense_shape for inp in inputs):
        if not all(inp.dense_shape for inp in inputs):
          raise ValueError("Either all merged IndexedSlices must have a "
                           "dense_shape, or none must have a dense_shape.")
        dense_shape, _ = gen_control_flow_ops._merge(
            [inp.dense_shape for inp in inputs], name="dense_shape")
      else:
        dense_shape = None
      return ops.IndexedSlices(values, indices, dense_shape), chosen_index
示例#7
0
 def convert(x):
   """Converts a function output to a Tensor."""
   if x is None:
     return None
   if op_return_value is not None and isinstance(x, ops.Operation):
     # TODO(b/79881896): we currently can't capture external control deps, so
     # this won't work if x needs to be captured (i.e. if python_func returns
     # captured Operations).
     with ops.control_dependencies([x]):
       x = array_ops.identity(op_return_value)
   elif not isinstance(x, tensor_array_ops.TensorArray):
     try:
       x = ops.convert_to_tensor_or_indexed_slices(x)
     except (ValueError, TypeError):
       raise TypeError(
           "To be compatible with tf.contrib.eager.defun, Python functions "
           "must return zero or more Tensors; in compilation of %s, found "
           "return value of type %s, which is not a Tensor." %
           (str(python_func), type(x)))
   if add_control_dependencies:
     x = a.mark_as_return(x)
   return x
示例#8
0
 def convert(x):
     """Converts a function output to a Tensor."""
     if x is None:
         return None
     if op_return_value is not None and isinstance(x, ops.Operation):
         # TODO(b/79881896): we currently can't capture external control deps, so
         # this won't work if x needs to be captured (i.e. if python_func returns
         # captured Operations).
         with ops.control_dependencies([x]):
             x = array_ops.identity(op_return_value)
     elif not isinstance(x, tensor_array_ops.TensorArray):
         try:
             x = ops.convert_to_tensor_or_indexed_slices(x)
         except (ValueError, TypeError):
             raise TypeError(
                 "To be compatible with tf.contrib.eager.defun, Python functions "
                 "must return zero or more Tensors; in compilation of %s, found "
                 "return value of type %s, which is not a Tensor." %
                 (str(python_func), type(x)))
     if add_control_dependencies:
         x = a.mark_as_return(x)
     return x
示例#9
0
    def update(v, g):
      """Apply gradients to a replica variable."""
      assert v is not None

      try:
        # Convert the grad to Tensor or IndexedSlices if necessary.
        g = ops.convert_to_tensor_or_indexed_slices(g)
      except TypeError:
        raise TypeError("Gradient must be convertible to a Tensor"
                        " or IndexedSlices, or None: %s" % g)
      if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
        raise TypeError(
            "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
      p = _get_processor(v)

      scope_name = "" if context.executing_eagerly() else v.op.name
      # device_policy is set because non-mirrored tensors will be read in
      # `update_op`. `_resource_apply_dense`, `lr_t`, `beta1_t` and `beta2_t`
      # is an example.
      with ops.name_scope(
          "update_" + scope_name), context.context().device_policy(
              context.DEVICE_PLACEMENT_SILENT):
        return p.update_op(self, g)
    def input_layer_with_layer_annotations(features,
                                           feature_columns,
                                           weight_collections=None,
                                           trainable=True,
                                           cols_to_vars=None,
                                           scope=None,
                                           cols_to_output_tensors=None,
                                           from_template=False):
        """Returns a dense `Tensor` as input layer based on given `feature_columns`.

    Generally a single example in training data is described with
    FeatureColumns.
    At the first layer of the model, this column oriented data should be
    converted
    to a single `Tensor`.

    This is like tf.feature_column.input_layer, except with added
    Integrated-Gradient annotations.

    Args:
      features: A mapping from key to tensors. `_FeatureColumn`s look up via
        these keys. For example `numeric_column('price')` will look at 'price'
        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
        on corresponding `_FeatureColumn`.
      feature_columns: An iterable containing the FeatureColumns to use as
        inputs to your model. All items should be instances of classes derived
        from `_DenseColumn` such as `numeric_column`, `embedding_column`,
        `bucketized_column`, `indicator_column`. If you have categorical
        features, you can wrap them with an `embedding_column` or
        `indicator_column`.
      weight_collections: A list of collection names to which the Variable will
        be added. Note that variables will also be added to collections
        `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
      trainable: If `True` also add the variable to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      cols_to_vars: If not `None`, must be a dictionary that will be filled with
        a mapping from `_FeatureColumn` to list of `Variable`s.  For example,
        after the call, we might have cols_to_vars = {_EmbeddingColumn(
        categorical_column=_HashedCategoricalColumn( key='sparse_feature',
        hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable
        'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
          shape=(5, 10)]} If a column creates no variables, its value will be an
          empty list.
      scope: A name or variable scope to use
      cols_to_output_tensors: If not `None`, must be a dictionary that will be
        filled with a mapping from '_FeatureColumn' to the associated output
        `Tensor`s.
      from_template: True if the method is being instantiated from a
        `make_template`.

    Returns:
      A `Tensor` which represents input layer of a model. Its shape
      is (batch_size, first_layer_dimension) and its dtype is `float32`.
      first_layer_dimension is determined based on given `feature_columns`.

    Raises:
      ValueError: features and feature_columns have different lengths.
    """

        local_cols_to_output_tensors = {}
        input_layer = original_input_layer(
            features=features,
            feature_columns=feature_columns,
            weight_collections=weight_collections,
            trainable=trainable,
            cols_to_vars=cols_to_vars,
            scope=scope,
            cols_to_output_tensors=local_cols_to_output_tensors,
            from_template=from_template)

        if cols_to_output_tensors is not None:
            cols_to_output_tensors = local_cols_to_output_tensors

        # Annotate features.
        # These are the parsed Tensors, before embedding.

        # Only annotate features used by FeatureColumns.
        # We figure which ones are used by FeatureColumns by creating a parsing
        # spec and looking at the keys.
        spec = feature_column_lib.make_parse_example_spec(feature_columns)
        for key in spec.keys():
            tensor = ops.convert_to_tensor_or_indexed_slices(features[key])
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.keys(
                    LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.values(
                    LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
                _to_any_wrapped_tensor_info(tensor))

        # Annotate feature columns.
        for column in feature_columns:
            # TODO(cyfoo): Find a better way to serialize and deserialize
            # _FeatureColumn.
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
                serialize_feature_column(column))

        for column, tensor in local_cols_to_output_tensors.items():
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.keys(
                    LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
                column.name)
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.values(
                    LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
                _to_any_wrapped_tensor_info(tensor))

        return input_layer
示例#11
0
 def convert(x):
   if x is None:
     return None
   x = ops.convert_to_tensor_or_indexed_slices(x)
   x = a.mark_as_return(x)
   return x
示例#12
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        # This is a default implementation of apply_gradients() that can be shared
        # by most optimizers.  It relies on the subclass implementing the following
        # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

        grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works
        converted_grads_and_vars = []
        for g, v in grads_and_vars:
            if g is not None:
                try:
                    # Convert the grad to Tensor or IndexedSlices if necessary
                    g = ops.convert_to_tensor_or_indexed_slices(g)
                except TypeError:
                    raise TypeError("Gradient must be convertible to a Tensor or IndexedSlices, or None: %s" % g)
            if not isinstance(g, (ops.Tensor, ops.IndexedSlices, type(None))):
                raise TypeError("Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
            if not isinstance(v, variables.Variable):
                raise TypeError("Variable must be a tf.Variable: %s" % v)

            converted_grads_and_vars.append((g, v))

        converted_grads_and_vars = tuple(converted_grads_and_vars)
        var_list = [v for g, v in converted_grads_and_vars if g is not None]
        if not var_list:
            raise ValueError("No gradients provided for any variable: %s" % (converted_grads_and_vars,))
        with ops.control_dependencies(None):
            self._create_slots(var_list)
        update_ops = []
        with ops.name_scope(name, self._name) as name:
            self._prepare()
            for grad, var in converted_grads_and_vars:
                if grad is None:
                    continue
                # We colocate all ops created in _apply_dense or _apply_sparse
                # on the same device as the variable.
                with ops.name_scope("update_" + var.op.name), ops.colocate_with(var):
                    if isinstance(grad, ops.Tensor):
                        update_ops.append(self._apply_dense(grad, var))
                    else:
                        update_ops.append(self._apply_sparse(grad, var))
            if global_step is None:
                return self._finish(update_ops, name)
            else:
                with ops.control_dependencies([self._finish(update_ops, "update")]):
                    with ops.colocate_with(global_step):
                        return state_ops.assign_add(global_step, 1, name=name).op
示例#13
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
      RuntimeError: If you should use `_distributed_apply()` instead.
    """
    # This is a default implementation of apply_gradients() that can be shared
    # by most optimizers.  It relies on the subclass implementing the following
    # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

    # Handle DistributionStrategy case.
    if distribute_ctx.get_cross_replica_context():
      raise RuntimeError("Use `_distributed_apply()` instead of "
                         "`apply_gradients()` in a cross-replica context.")
    # TODO(isaprykin): Get rid of `has_distribution_strategy()` check by
    # always calling _distributed_apply(), using the default distribution
    # as needed.
    if distribute_ctx.has_distribution_strategy():
      grads_and_vars = get_filtered_grad_fn(lambda: grads_and_vars)()
      return distribute_ctx.get_replica_context().merge_call(
          self._distributed_apply, args=(grads_and_vars, global_step, name))

    # No DistributionStrategy case.
    grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
    if not grads_and_vars:
      raise ValueError("No variables provided.")
    converted_grads_and_vars = []
    for g, v in grads_and_vars:
      if g is not None:
        try:
          # Convert the grad to Tensor or IndexedSlices if necessary.
          g = ops.convert_to_tensor_or_indexed_slices(g)
        except TypeError:
          raise TypeError(
              "Gradient must be convertible to a Tensor"
              " or IndexedSlices, or None: %s" % g)
        if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
          raise TypeError(
              "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
      p = _get_processor(v)
      converted_grads_and_vars.append((g, v, p))

    converted_grads_and_vars = tuple(converted_grads_and_vars)
    var_list = [v for g, v, _ in converted_grads_and_vars if g is not None]
    if not var_list:
      raise ValueError("No gradients provided for any variable: %s." %
                       ([str(v) for _, v, _ in converted_grads_and_vars],))
    with ops.init_scope():
      self._create_slots(var_list)
    update_ops = []
    with ops.name_scope(name, self._name) as name:
      self._prepare()
      for grad, var, processor in converted_grads_and_vars:
        if grad is None:
          continue
        # We colocate all ops created in _apply_dense or _apply_sparse
        # on the same device as the variable.
        # TODO(apassos): figure out how to get the variable name here.
        if context.executing_eagerly() or isinstance(
            var,
            resource_variable_ops.ResourceVariable) and not var._in_graph_mode:  # pylint: disable=protected-access
          scope_name = ""
        else:
          scope_name = var.op.name
        with ops.name_scope("update_" + scope_name), ops.colocate_with(var):
          update_ops.append(processor.update_op(self, grad))
      if global_step is None:
        apply_updates = self._finish(update_ops, name)
      else:
        with ops.control_dependencies([self._finish(update_ops, "update")]):
          with ops.colocate_with(global_step):
            if isinstance(global_step, resource_variable_ops.ResourceVariable):
              # TODO(apassos): the implicit read in assign_add is slow; consider
              # making it less so.
              apply_updates = resource_variable_ops.assign_add_variable_op(
                  global_step.handle,
                  ops.convert_to_tensor(1, dtype=global_step.dtype),
                  name=name)
            else:
              apply_updates = state_ops.assign_add(global_step, 1, name=name)

      if not context.executing_eagerly():
        if isinstance(apply_updates, ops.Tensor):
          apply_updates = apply_updates.op
        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
        if apply_updates not in train_op:
          train_op.append(apply_updates)

      return apply_updates
示例#14
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
    # This is a default implementation of apply_gradients() that can be shared
    # by most optimizers.  It relies on the subclass implementing the following
    # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

    grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
    if not grads_and_vars:
      raise ValueError("No variables provided.")
    converted_grads_and_vars = []
    for g, v in grads_and_vars:
      if g is not None:
        try:
          # Convert the grad to Tensor or IndexedSlices if necessary.
          g = ops.convert_to_tensor_or_indexed_slices(g)
        except TypeError:
          raise TypeError(
              "Gradient must be convertible to a Tensor"
              " or IndexedSlices, or None: %s" % g)
        if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
          raise TypeError(
              "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
      p = _get_processor(v)
      converted_grads_and_vars.append((g, v, p))

    converted_grads_and_vars = tuple(converted_grads_and_vars)
    var_list = [v for g, v, _ in converted_grads_and_vars if g is not None]
    if not var_list:
      raise ValueError("No gradients provided for any variable: %s." %
                       ([str(v) for _, _, v in converted_grads_and_vars],))
    with ops.control_dependencies(None):
      self._create_slots([_get_variable_for(v) for v in var_list])
    update_ops = []
    with ops.name_scope(name, self._name) as name:
      self._prepare()
      for grad, var, processor in converted_grads_and_vars:
        if grad is None:
          continue
        # We colocate all ops created in _apply_dense or _apply_sparse
        # on the same device as the variable.
        # TODO(apassos): figure out how to get the variable name here.
        scope_name = var.op.name if context.in_graph_mode() else ""
        with ops.name_scope("update_" + scope_name), ops.colocate_with(var):
          update_ops.append(processor.update_op(self, grad))
      if global_step is None:
        apply_updates = self._finish(update_ops, name)
      else:
        with ops.control_dependencies([self._finish(update_ops, "update")]):
          with ops.colocate_with(global_step):
            apply_updates = state_ops.assign_add(global_step, 1, name=name).op

      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
      if apply_updates not in train_op:
        train_op.append(apply_updates)

      return apply_updates
示例#15
0
 def convert(x):
     if x is None:
         return None
     x = ops.convert_to_tensor_or_indexed_slices(x)
     x = a.mark_as_return(x)
     return x
  def input_layer_with_layer_annotations(features,
                                         feature_columns,
                                         weight_collections=None,
                                         trainable=True,
                                         cols_to_vars=None,
                                         scope=None,
                                         cols_to_output_tensors=None,
                                         from_template=False):
    """Returns a dense `Tensor` as input layer based on given `feature_columns`.

    Generally a single example in training data is described with
    FeatureColumns.
    At the first layer of the model, this column oriented data should be
    converted
    to a single `Tensor`.

    This is like tf.feature_column.input_layer, except with added
    Integrated-Gradient annotations.

    Args:
      features: A mapping from key to tensors. `_FeatureColumn`s look up via
        these keys. For example `numeric_column('price')` will look at 'price'
        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
        on corresponding `_FeatureColumn`.
      feature_columns: An iterable containing the FeatureColumns to use as
        inputs to your model. All items should be instances of classes derived
        from `_DenseColumn` such as `numeric_column`, `embedding_column`,
        `bucketized_column`, `indicator_column`. If you have categorical
        features, you can wrap them with an `embedding_column` or
        `indicator_column`.
      weight_collections: A list of collection names to which the Variable will
        be added. Note that variables will also be added to collections
        `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
      trainable: If `True` also add the variable to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      cols_to_vars: If not `None`, must be a dictionary that will be filled with
        a mapping from `_FeatureColumn` to list of `Variable`s.  For example,
        after the call, we might have cols_to_vars = {_EmbeddingColumn(
        categorical_column=_HashedCategoricalColumn( key='sparse_feature',
        hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable
        'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
          shape=(5, 10)]} If a column creates no variables, its value will be an
          empty list.
      scope: A name or variable scope to use
      cols_to_output_tensors: If not `None`, must be a dictionary that will be
        filled with a mapping from '_FeatureColumn' to the associated output
        `Tensor`s.
      from_template: True if the method is being instantiated from a
        `make_template`.

    Returns:
      A `Tensor` which represents input layer of a model. Its shape
      is (batch_size, first_layer_dimension) and its dtype is `float32`.
      first_layer_dimension is determined based on given `feature_columns`.

    Raises:
      ValueError: features and feature_columns have different lengths.
    """

    local_cols_to_output_tensors = {}
    input_layer = original_input_layer(
        features=features,
        feature_columns=feature_columns,
        weight_collections=weight_collections,
        trainable=trainable,
        cols_to_vars=cols_to_vars,
        scope=scope,
        cols_to_output_tensors=local_cols_to_output_tensors,
        from_template=from_template)

    if cols_to_output_tensors is not None:
      cols_to_output_tensors = local_cols_to_output_tensors

    # Annotate features.
    # These are the parsed Tensors, before embedding.

    # Only annotate features used by FeatureColumns.
    # We figure which ones are used by FeatureColumns by creating a parsing
    # spec and looking at the keys.
    spec = feature_column_lib.make_parse_example_spec(feature_columns)
    for key in spec.keys():
      tensor = ops.convert_to_tensor_or_indexed_slices(features[key])
      ops.add_to_collection(
          LayerAnnotationsCollectionNames.keys(
              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
      ops.add_to_collection(
          LayerAnnotationsCollectionNames.values(
              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
          _to_any_wrapped_tensor_info(tensor))

    # Annotate feature columns.
    for column in feature_columns:
      # TODO(cyfoo): Find a better way to serialize and deserialize
      # _FeatureColumn.
      ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
                            serialize_feature_column(column))

    for column, tensor in local_cols_to_output_tensors.items():
      ops.add_to_collection(
          LayerAnnotationsCollectionNames.keys(
              LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name)
      ops.add_to_collection(
          LayerAnnotationsCollectionNames.values(
              LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
          _to_any_wrapped_tensor_info(tensor))

    return input_layer
示例#17
0
文件: trpo.py 项目: qq456cvb/TRPO
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):

        # No DistributionStrategy case.
        grads_and_vars = tuple(
            grads_and_vars)  # Make sure repeat iteration works.
        if not grads_and_vars:
            raise ValueError("No variables provided.")
        converted_grads_and_vars = []
        for g, v in grads_and_vars:
            if g is not None:
                try:
                    # Convert the grad to Tensor or IndexedSlices if necessary.
                    g = ops.convert_to_tensor_or_indexed_slices(g)
                except TypeError:
                    raise TypeError("Gradient must be convertible to a Tensor"
                                    " or IndexedSlices, or None: %s" % g)
                if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
                    raise TypeError(
                        "Gradient must be a Tensor, IndexedSlices, or None: %s"
                        % g)
            # p = _get_processor(v)
            converted_grads_and_vars.append((g, v))

        # converted_grads_and_vars = tuple(converted_grads_and_vars)
        converted_grads_and_vars = tuple([(g, v)
                                          for g, v in converted_grads_and_vars
                                          if g is not None])
        var_list = [v for g, v in converted_grads_and_vars]
        cache_var_list = []
        for v in var_list:
            for c in self.cache_vars:
                if c.op.name == v.op.name + 'cache':
                    cache_var_list.append(c)
                    break
        assert len(var_list) == len(cache_var_list)
        if not var_list:
            raise ValueError("No gradients provided for any variable: %s." %
                             ([v.name for _, v in converted_grads_and_vars], ))
        with ops.init_scope():
            self._create_slots(var_list)
        var_shapes = [v.shape for _, v in converted_grads_and_vars]
        slice_idx = np.concatenate(
            [[0], np.cumsum([np.prod(vs) for vs in var_shapes])], 0)
        # print(var_shapes)
        # print(slice_idx)
        with ops.name_scope(name, self._name) as name:
            self._prepare()
            grad_flatten = tf.concat([
                tf.reshape(grad, [-1]) for grad, _ in converted_grads_and_vars
            ], 0)
            KL_grad = tf.gradients(self._mean_KL, var_list)
            KL_grad_flatten = tf.concat([tf.reshape(g, [-1]) for g in KL_grad],
                                        0)

            # calculate Hessian * x
            def Hx_fn(m):
                grads = tf.gradients(
                    tf.reduce_sum(KL_grad_flatten * tf.stop_gradient(m)),
                    var_list)
                return tf.concat([tf.reshape(g, [-1])
                                  for g in grads], 0) + 1e-5

            x = self.cg(Hx_fn, grad_flatten)
            xHx = tf.reduce_sum(tf.transpose(x) * Hx_fn(x))
            beta = tf.sqrt(2 * self._delta_t / (xHx + 1e-8))

            def get_KL(policy):
                return tf.reduce_mean(
                    tf.reduce_sum(
                        tf.stop_gradient(self._policy) * tf.log(
                            tf.stop_gradient(self._policy) /
                            (policy + 1e-8) + 1e-8), 1))

            i = tf.constant(0)

            def c(i, beta):
                with tf.control_dependencies([
                        control_flow_ops.group([
                            state_ops.assign(
                                var, cache_var_list[i] - beta *
                                tf.reshape(x[slice_idx[i]:slice_idx[i + 1]],
                                           var_shapes[i]))
                            for i, (_, var) in enumerate(grads_and_vars)
                        ])
                ]):
                    kl = get_KL(self.policy_fn())
                    cost = self.cost_fn()
                    return tf.logical_and(
                        i < self._ls_max_iter_t,
                        tf.logical_or(kl > self._delta_t,
                                      cost > self._cost_before))

            b = lambda i, beta: [i + 1, self._back_trace_ratio_t * beta]
            i, _ = tf.while_loop(c, b, loop_vars=[i, beta], back_prop=False)

            var_update = tf.cond(
                tf.logical_or(tf.equal(i, self._ls_max_iter_t),
                              tf.logical_not(tf.reduce_any(tf.is_nan(x)))),
                lambda: self.cache2var, lambda: self.var2cache)

            if not context.executing_eagerly():
                if isinstance(var_update, ops.Tensor):
                    var_update = var_update.op
                train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
                if var_update not in train_op:
                    train_op.append(var_update)
            return var_update
示例#18
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
      RuntimeError: If you should use `_distributed_apply()` instead.
    """
    # This is a default implementation of apply_gradients() that can be shared
    # by most optimizers.  It relies on the subclass implementing the following
    # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

    # TODO(isaprykin): Get rid of `has_strategy()` check by
    # always calling _distributed_apply(), using the default distribution
    # as needed.
    if distribute_ctx.has_strategy():
      # Handle DistributionStrategy case.
      if distribute_ctx.in_cross_replica_context():
        raise RuntimeError("Use `_distributed_apply()` instead of "
                           "`apply_gradients()` in a cross-replica context.")

      grads_and_vars = get_filtered_grad_fn(lambda: grads_and_vars)()
      return distribute_ctx.get_replica_context().merge_call(
          self._distributed_apply, args=(grads_and_vars, global_step, name))

    # No DistributionStrategy case.
    grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
    if not grads_and_vars:
      raise ValueError("No variables provided.")
    converted_grads_and_vars = []
    for g, v in grads_and_vars:
      if g is not None:
        try:
          # Convert the grad to Tensor or IndexedSlices if necessary.
          g = ops.convert_to_tensor_or_indexed_slices(g)
        except TypeError:
          raise TypeError(
              "Gradient must be convertible to a Tensor"
              " or IndexedSlices, or None: %s" % g)
        if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
          raise TypeError(
              "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
      p = _get_processor(v)
      converted_grads_and_vars.append((g, v, p))

    converted_grads_and_vars = tuple(converted_grads_and_vars)
    var_list = [v for g, v, _ in converted_grads_and_vars if g is not None]
    if not var_list:
      raise ValueError("No gradients provided for any variable: %s." %
                       ([str(v) for _, v, _ in converted_grads_and_vars],))
    with ops.init_scope():
      self._create_slots(var_list)
    update_ops = []
    with ops.name_scope(name, self._name) as name:
      self._prepare()
      for grad, var, processor in converted_grads_and_vars:
        if grad is None:
          continue
        # We colocate all ops created in _apply_dense or _apply_sparse
        # on the same device as the variable.
        # TODO(apassos): figure out how to get the variable name here.
        if context.executing_eagerly() or isinstance(
            var,
            resource_variable_ops.ResourceVariable) and not var._in_graph_mode:  # pylint: disable=protected-access
          scope_name = ""
        else:
          scope_name = var.op.name
        with ops.name_scope("update_" + scope_name), ops.colocate_with(var):
          update_ops.append(processor.update_op(self, grad))
      if global_step is None:
        apply_updates = self._finish(update_ops, name)
      else:
        with ops.control_dependencies([self._finish(update_ops, "update")]):
          with ops.colocate_with(global_step):
            if isinstance(global_step, resource_variable_ops.ResourceVariable):
              # TODO(apassos): the implicit read in assign_add is slow; consider
              # making it less so.
              apply_updates = resource_variable_ops.assign_add_variable_op(
                  global_step.handle,
                  ops.convert_to_tensor(1, dtype=global_step.dtype),
                  name=name)
            else:
              apply_updates = state_ops.assign_add(global_step, 1, name=name)

      if not context.executing_eagerly():
        if isinstance(apply_updates, ops.Tensor):
          apply_updates = apply_updates.op
        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
        if apply_updates not in train_op:
          train_op.append(apply_updates)

      return apply_updates
示例#19
0
 def convert(x):
   if x is None:
     return None
   return ops.convert_to_tensor_or_indexed_slices(x)
示例#20
0
    def apply_gradients(self, grads_and_vars, error, global_step=None, name=None):
        """
        Updates the weights of the network using EKF using grads_and_vars, the linearized measurment
        Matrices H and the variables, as well as the error = y_target-y_pred.

        Args:
          grads_and_vars: List of (H, variable) pairs as returned by
            `compute_gradient_Hs()`.
            error: the tensor y_target - y_pred
          global_step: Optional `Variable` to increment by one after the
            variables have been updated.
          name: Optional name for the returned operation.  Default to the
            name passed to the `Optimizer` constructor.

        Returns:
          An `Operation` that applies the specified gradients. If `global_step`
          was not None, that operation also increments `global_step`.

        Raises:
          TypeError: If `grads_and_vars` is malformed.
          ValueError: If none of the variables have gradients.
        """
        # This is a default implementation of apply_gradients() that can be shared
        # by most optimizers.  It relies on the subclass implementing the following
        # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

        grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
        if not grads_and_vars:
            raise ValueError("No variables provided.")
        converted_grads_and_vars = []
        for g, v in grads_and_vars:
            if g is not None:
                try:
                    # Convert the grad to Tensor or IndexedSlices if necessary.
                    g = ops.convert_to_tensor_or_indexed_slices(g)
                except TypeError:
                    raise TypeError(
                        "Gradient must be convertible to a Tensor"
                        " or IndexedSlices, or None: %s" % g)
                if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
                    raise TypeError(
                        "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
            converted_grads_and_vars.append((g, v))

        converted_grads_and_vars = tuple(converted_grads_and_vars)
        var_list = [v for g, v in converted_grads_and_vars if g is not None]
        if not var_list:
            raise ValueError("No gradients provided for any variable: %s." %
                             ([str(v) for  _, v in converted_grads_and_vars],))
        with ops.control_dependencies(None):
            self._create_slots(var_list)
        update_ops = []
        with ops.name_scope(name, self._name) as name:
            self._prepare()
            for grad, var in converted_grads_and_vars:
                if grad is None:
                    continue
                # We colocate all ops created in _apply_dense or _apply_sparse
                # on the same device as the variable.
                with ops.name_scope("update_" + var.op.name), ops.colocate_with(var):
                    update_ops.append(self._apply_dense(grad, var, error))
            if global_step is None:
                apply_updates = self._finish(update_ops, name)
            else:
                with ops.control_dependencies([self._finish(update_ops, "update")]):
                    with ops.colocate_with(global_step):
                        apply_updates = state_ops.assign_add(global_step, 1, name=name).op

            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            if apply_updates not in train_op:
                train_op.append(apply_updates)

            return apply_updates
示例#21
0
 def convert(x):
     if x is None:
         return None
     return ops.convert_to_tensor_or_indexed_slices(x)
示例#22
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        # This is a default implementation of apply_gradients() that can be shared
        # by most optimizers.  It relies on the subclass implementing the following
        # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

        grads_and_vars = tuple(
            grads_and_vars)  # Make sure repeat iteration works.
        if not grads_and_vars:
            raise ValueError("No variables provided.")
        converted_grads_and_vars = []
        for g, v in grads_and_vars:
            if g is not None:
                try:
                    # Convert the grad to Tensor or IndexedSlices if necessary.
                    g = ops.convert_to_tensor_or_indexed_slices(g)
                except TypeError:
                    raise TypeError("Gradient must be convertible to a Tensor"
                                    " or IndexedSlices, or None: %s" % g)
                if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
                    raise TypeError(
                        "Gradient must be a Tensor, IndexedSlices, or None: %s"
                        % g)
            p = _get_processor(v)
            converted_grads_and_vars.append((g, v, p))

        converted_grads_and_vars = tuple(converted_grads_and_vars)
        var_list = [v for g, v, _ in converted_grads_and_vars if g is not None]
        if not var_list:
            raise ValueError("No gradients provided for any variable: %s." %
                             ([str(v)
                               for _, _, v in converted_grads_and_vars], ))
        with ops.control_dependencies(None):
            self._create_slots([_get_variable_for(v) for v in var_list])
        update_ops = []
        with ops.name_scope(name, self._name) as name:
            self._prepare()
            for grad, var, processor in converted_grads_and_vars:
                if grad is None:
                    continue
                # We colocate all ops created in _apply_dense or _apply_sparse
                # on the same device as the variable.
                # TODO (apassos): figure out how to get the variable name here. id:3354 gh:3355
                scope_name = var.op.name if context.in_graph_mode() else ""
                with ops.name_scope("update_" +
                                    scope_name), ops.colocate_with(var):
                    update_ops.append(processor.update_op(self, grad))
            if global_step is None:
                apply_updates = self._finish(update_ops, name)
            else:
                with ops.control_dependencies(
                    [self._finish(update_ops, "update")]):
                    with ops.colocate_with(global_step):
                        apply_updates = state_ops.assign_add(global_step,
                                                             1,
                                                             name=name)

            if context.in_graph_mode():
                if isinstance(apply_updates, ops.Tensor):
                    apply_updates = apply_updates.op
                train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
                if apply_updates not in train_op:
                    train_op.append(apply_updates)

            return apply_updates
示例#23
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        # This is a default implementation of apply_gradients() that can be shared
        # by most optimizers.  It relies on the subclass implementing the following
        # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

        grads_and_vars = tuple(
            grads_and_vars)  # Make sure repeat iteration works
        converted_grads_and_vars = []
        for g, v in grads_and_vars:
            if g is not None:
                try:
                    # Convert the grad to Tensor or IndexedSlices if necessary
                    g = ops.convert_to_tensor_or_indexed_slices(g)
                except TypeError:
                    raise TypeError(
                        "Gradient must be convertible to a Tensor or IndexedSlices, or None: %s"
                        % g)
            if not isinstance(g, (ops.Tensor, ops.IndexedSlices, type(None))):
                raise TypeError(
                    "Gradient must be a Tensor, IndexedSlices, or None: %s" %
                    g)
            if not isinstance(v, variables.Variable):
                raise TypeError("Variable must be a tf.Variable: %s" % v)

            converted_grads_and_vars.append((g, v))

        converted_grads_and_vars = tuple(converted_grads_and_vars)
        var_list = [v for g, v in converted_grads_and_vars if g is not None]
        if not var_list:
            raise ValueError("No gradients provided for any variable: %s" %
                             (converted_grads_and_vars, ))
        with ops.control_dependencies(None):
            self._create_slots(var_list)
        update_ops = []
        with ops.name_scope(name, self._name) as name:
            self._prepare()
            for grad, var in converted_grads_and_vars:
                if grad is None:
                    continue
                # We colocate all ops created in _apply_dense or _apply_sparse
                # on the same device as the variable.
                with ops.name_scope("update_" +
                                    var.op.name), ops.colocate_with(var):
                    if isinstance(grad, ops.Tensor):
                        update_ops.append(self._apply_dense(grad, var))
                    else:
                        update_ops.append(self._apply_sparse(grad, var))
            if global_step is None:
                return self._finish(update_ops, name)
            else:
                with ops.control_dependencies(
                    [self._finish(update_ops, "update")]):
                    with ops.colocate_with(global_step):
                        return state_ops.assign_add(global_step, 1,
                                                    name=name).op