def _SwitchRefOrTensor(data, pred, name="Switch"): """Forwards `data` to an output determined by `pred`. If `pred` is true, the `data` input is forwared to the first output. Otherwise, the data goes to the second output. This op handles `Tensor`s and `IndexedSlices`. Args: data: The tensor to be forwarded to the appropriate output. pred: A scalar that specifies which output port will receive data. name: A name for this operation (optional). Returns: `(output_false, output_false)`: If `pred` is true, data will be forwarded to `output_true`, otherwise it goes to `output_false`. Raises: TypeError: if data is not a Tensor or IndexedSlices """ data = ops.convert_to_tensor_or_indexed_slices(data, name="data") if isinstance(data, ops.Tensor): if not data.dtype.is_ref_dtype: return switch(data, pred, name=name) else: return ref_switch(data, pred, name=name) else: return switch(data, pred, name=name)
def update(v, g): """Apply gradients to a replica variable.""" assert v is not None try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError("Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) if context.executing_eagerly() or ( resource_variable_ops.is_resource_variable(v) and not v._in_graph_mode): # pylint: disable=protected-access scope_name = v.name.split(":")[0] else: scope_name = v.op.name # device_policy is set because non-mirrored tensors will be read in # `update_op`. `_resource_apply_dense`, `lr_t`, `beta1_t` and `beta2_t` # is an example. with ops.name_scope("update_" + scope_name): return p.update_op(self, g)
def with_dependencies(dependencies, output_tensor, name=None): """Produces the content of `output_tensor` only after `dependencies`. In some cases, a user may want the output of an operation to be consumed externally only after some other dependencies have run first. This function ensures returns `output_tensor`, but only after all operations in `dependencies` have run. Note that this means that there is no guarantee that `output_tensor` will be evaluated after any `dependencies` have run. See also `tuple` and `group`. Args: dependencies: A list of operations to run before this op finishes. output_tensor: A `Tensor` or `IndexedSlices` that will be returned. name: (Optional) A name for this operation. Returns: Same as `output_tensor`. Raises: TypeError: if `output_tensor` is not a `Tensor` or `IndexedSlices`. """ with ops.op_scope(dependencies + [output_tensor], name, "control_dependency") as name: with ops.device(output_tensor.device or ops.get_default_graph().get_default_device()): with ops.control_dependencies(dependencies): output_tensor = ops.convert_to_tensor_or_indexed_slices(output_tensor) if isinstance(output_tensor, ops.Tensor): return _Identity(output_tensor, name=name) else: return ops.IndexedSlices(_Identity(output_tensor.values, name=name), output_tensor.indices, output_tensor.dense_shape)
def _AsTensorList(x, p): """Return x as a list of Tensors or IndexedSlices. For entries of `x` that are Operations, this returns an Identity of `p` with a dependency on the operation. Args: x: A Tensor/IndexedSlices/Operation or a list or tuple of them. p: A Tensor to return for entries in `x` that are Operations. Returns: A list of Tensors or IndexedSlices. """ if not isinstance(x, list) and not isinstance(x, _basetuple): x = [x] l = [] for v in x: if isinstance(v, ops.Operation): v = with_dependencies([v], p) v = ops.convert_to_tensor_or_indexed_slices(v) if isinstance(v, ops.Tensor): l.append(array_ops.identity(v)) else: l.append(ops.IndexedSlices(array_ops.identity(v.values), array_ops.identity(v.indices))) return l
def switch(data, pred, name=None): """Forwards `data` to an output determined by `pred`. If `pred` is true, the `data` input is forwared to the first output. Otherwise, the data goes to the second output. This op handles `Tensor`s and `IndexedSlices`. Args: data: The tensor to be forwarded to the appropriate output. pred: A scalar that specifies which output port will receive data. name: A name for this operation (optional). Returns: `(output_true, output_false)`: If `pred` is true, data will be forwarded to `output_true`, otherwise it goes to `output_false`. """ with ops.op_scope([data, pred], name, "Switch") as name: data = ops.convert_to_tensor_or_indexed_slices(data, name="data") pred = ops.convert_to_tensor(pred, name="pred") if isinstance(data, ops.Tensor): return gen_control_flow_ops._switch(data, pred, name=name) else: val, ind, dense_shape = data.values, data.indices, data.dense_shape val_f, val_t = gen_control_flow_ops._switch(val, pred, name=name) ind_f, ind_t = gen_control_flow_ops._switch(ind, pred, name="indices") if dense_shape: dense_shape_f, dense_shape_t = gen_control_flow_ops._switch( dense_shape, pred, name="dense_shape") else: dense_shape_f, dense_shape_t = None, None return (ops.IndexedSlices(val_f, ind_f, dense_shape_f), ops.IndexedSlices(val_t, ind_t, dense_shape_t))
def merge(inputs, name=None): """Returns the value of an available element of `inputs`. This op tests each of the tensors in `inputs` in turn to determine if any of them is available. If it finds an available tensor, it returns it and its index in `inputs`. It is an error if more than one tensor in `inputs` is available. If no tensor in `inputs` is available, the returned tensor and index are not set. This op handles both `Tensor`s and `IndexedSlices`. If inputs has a mix of `Tensor`s and `IndexedSlices`, all inputs are converted to IndexedSlices before merging. Args: inputs: The input tensors, at most one of which is available. name: A name for this operation (optional). Returns: A tuple containing the chosen input tensor and its index in `inputs`. Raises: ValueError: If inputs are IndexedSlices and some but not all have a dense_shape property. """ with ops.op_scope(inputs, name, "Merge") as name: inputs = [ops.convert_to_tensor_or_indexed_slices(inp) for inp in inputs] if all([isinstance(inp, ops.Tensor) for inp in inputs]): return gen_control_flow_ops._merge(inputs, name=name) else: inputs = math_ops._as_indexed_slices_list(inputs) values, _ = gen_control_flow_ops._merge([inp.values for inp in inputs], name=name) indices, chosen_index = gen_control_flow_ops._merge( [inp.indices for inp in inputs], name="indices") if any(inp.dense_shape for inp in inputs): if not all(inp.dense_shape for inp in inputs): raise ValueError("Either all merged IndexedSlices must have a " "dense_shape, or none must have a dense_shape.") dense_shape, _ = gen_control_flow_ops._merge( [inp.dense_shape for inp in inputs], name="dense_shape") else: dense_shape = None return ops.IndexedSlices(values, indices, dense_shape), chosen_index
def convert(x): """Converts a function output to a Tensor.""" if x is None: return None if op_return_value is not None and isinstance(x, ops.Operation): # TODO(b/79881896): we currently can't capture external control deps, so # this won't work if x needs to be captured (i.e. if python_func returns # captured Operations). with ops.control_dependencies([x]): x = array_ops.identity(op_return_value) elif not isinstance(x, tensor_array_ops.TensorArray): try: x = ops.convert_to_tensor_or_indexed_slices(x) except (ValueError, TypeError): raise TypeError( "To be compatible with tf.contrib.eager.defun, Python functions " "must return zero or more Tensors; in compilation of %s, found " "return value of type %s, which is not a Tensor." % (str(python_func), type(x))) if add_control_dependencies: x = a.mark_as_return(x) return x
def update(v, g): """Apply gradients to a replica variable.""" assert v is not None try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError("Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) scope_name = "" if context.executing_eagerly() else v.op.name # device_policy is set because non-mirrored tensors will be read in # `update_op`. `_resource_apply_dense`, `lr_t`, `beta1_t` and `beta2_t` # is an example. with ops.name_scope( "update_" + scope_name), context.context().device_policy( context.DEVICE_PLACEMENT_SILENT): return p.update_op(self, g)
def input_layer_with_layer_annotations(features, feature_columns, weight_collections=None, trainable=True, cols_to_vars=None, scope=None, cols_to_output_tensors=None, from_template=False): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. At the first layer of the model, this column oriented data should be converted to a single `Tensor`. This is like tf.feature_column.input_layer, except with added Integrated-Gradient annotations. Args: features: A mapping from key to tensors. `_FeatureColumn`s look up via these keys. For example `numeric_column('price')` will look at 'price' key in this dict. Values can be a `SparseTensor` or a `Tensor` depends on corresponding `_FeatureColumn`. feature_columns: An iterable containing the FeatureColumns to use as inputs to your model. All items should be instances of classes derived from `_DenseColumn` such as `numeric_column`, `embedding_column`, `bucketized_column`, `indicator_column`. If you have categorical features, you can wrap them with an `embedding_column` or `indicator_column`. weight_collections: A list of collection names to which the Variable will be added. Note that variables will also be added to collections `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). cols_to_vars: If not `None`, must be a dictionary that will be filled with a mapping from `_FeatureColumn` to list of `Variable`s. For example, after the call, we might have cols_to_vars = {_EmbeddingColumn( categorical_column=_HashedCategoricalColumn( key='sparse_feature', hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable 'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1' shape=(5, 10)]} If a column creates no variables, its value will be an empty list. scope: A name or variable scope to use cols_to_output_tensors: If not `None`, must be a dictionary that will be filled with a mapping from '_FeatureColumn' to the associated output `Tensor`s. from_template: True if the method is being instantiated from a `make_template`. Returns: A `Tensor` which represents input layer of a model. Its shape is (batch_size, first_layer_dimension) and its dtype is `float32`. first_layer_dimension is determined based on given `feature_columns`. Raises: ValueError: features and feature_columns have different lengths. """ local_cols_to_output_tensors = {} input_layer = original_input_layer( features=features, feature_columns=feature_columns, weight_collections=weight_collections, trainable=trainable, cols_to_vars=cols_to_vars, scope=scope, cols_to_output_tensors=local_cols_to_output_tensors, from_template=from_template) if cols_to_output_tensors is not None: cols_to_output_tensors = local_cols_to_output_tensors # Annotate features. # These are the parsed Tensors, before embedding. # Only annotate features used by FeatureColumns. # We figure which ones are used by FeatureColumns by creating a parsing # spec and looking at the keys. spec = feature_column_lib.make_parse_example_spec(feature_columns) for key in spec.keys(): tensor = ops.convert_to_tensor_or_indexed_slices(features[key]) ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) # Annotate feature columns. for column in feature_columns: # TODO(cyfoo): Find a better way to serialize and deserialize # _FeatureColumn. ops.add_to_collection( LayerAnnotationsCollectionNames.FEATURE_COLUMNS, serialize_feature_column(column)) for column, tensor in local_cols_to_output_tensors.items(): ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) return input_layer
def convert(x): if x is None: return None x = ops.convert_to_tensor_or_indexed_slices(x) x = a.mark_as_return(x) return x
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError("Gradient must be convertible to a Tensor or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices, type(None))): raise TypeError("Gradient must be a Tensor, IndexedSlices, or None: %s" % g) if not isinstance(v, variables.Variable): raise TypeError("Variable must be a tf.Variable: %s" % v) converted_grads_and_vars.append((g, v)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s" % (converted_grads_and_vars,)) with ops.control_dependencies(None): self._create_slots(var_list) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. with ops.name_scope("update_" + var.op.name), ops.colocate_with(var): if isinstance(grad, ops.Tensor): update_ops.append(self._apply_dense(grad, var)) else: update_ops.append(self._apply_sparse(grad, var)) if global_step is None: return self._finish(update_ops, name) else: with ops.control_dependencies([self._finish(update_ops, "update")]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1, name=name).op
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. RuntimeError: If you should use `_distributed_apply()` instead. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). # Handle DistributionStrategy case. if distribute_ctx.get_cross_replica_context(): raise RuntimeError("Use `_distributed_apply()` instead of " "`apply_gradients()` in a cross-replica context.") # TODO(isaprykin): Get rid of `has_distribution_strategy()` check by # always calling _distributed_apply(), using the default distribution # as needed. if distribute_ctx.has_distribution_strategy(): grads_and_vars = get_filtered_grad_fn(lambda: grads_and_vars)() return distribute_ctx.get_replica_context().merge_call( self._distributed_apply, args=(grads_and_vars, global_step, name)) # No DistributionStrategy case. grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError( "Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) converted_grads_and_vars.append((g, v, p)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v, _ in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, v, _ in converted_grads_and_vars],)) with ops.init_scope(): self._create_slots(var_list) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var, processor in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO(apassos): figure out how to get the variable name here. if context.executing_eagerly() or isinstance( var, resource_variable_ops.ResourceVariable) and not var._in_graph_mode: # pylint: disable=protected-access scope_name = "" else: scope_name = var.op.name with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: apply_updates = self._finish(update_ops, name) else: with ops.control_dependencies([self._finish(update_ops, "update")]): with ops.colocate_with(global_step): if isinstance(global_step, resource_variable_ops.ResourceVariable): # TODO(apassos): the implicit read in assign_add is slow; consider # making it less so. apply_updates = resource_variable_ops.assign_add_variable_op( global_step.handle, ops.convert_to_tensor(1, dtype=global_step.dtype), name=name) else: apply_updates = state_ops.assign_add(global_step, 1, name=name) if not context.executing_eagerly(): if isinstance(apply_updates, ops.Tensor): apply_updates = apply_updates.op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if apply_updates not in train_op: train_op.append(apply_updates) return apply_updates
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError( "Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) converted_grads_and_vars.append((g, v, p)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v, _ in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, _, v in converted_grads_and_vars],)) with ops.control_dependencies(None): self._create_slots([_get_variable_for(v) for v in var_list]) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var, processor in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO(apassos): figure out how to get the variable name here. scope_name = var.op.name if context.in_graph_mode() else "" with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: apply_updates = self._finish(update_ops, name) else: with ops.control_dependencies([self._finish(update_ops, "update")]): with ops.colocate_with(global_step): apply_updates = state_ops.assign_add(global_step, 1, name=name).op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if apply_updates not in train_op: train_op.append(apply_updates) return apply_updates
def input_layer_with_layer_annotations(features, feature_columns, weight_collections=None, trainable=True, cols_to_vars=None, scope=None, cols_to_output_tensors=None, from_template=False): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. At the first layer of the model, this column oriented data should be converted to a single `Tensor`. This is like tf.feature_column.input_layer, except with added Integrated-Gradient annotations. Args: features: A mapping from key to tensors. `_FeatureColumn`s look up via these keys. For example `numeric_column('price')` will look at 'price' key in this dict. Values can be a `SparseTensor` or a `Tensor` depends on corresponding `_FeatureColumn`. feature_columns: An iterable containing the FeatureColumns to use as inputs to your model. All items should be instances of classes derived from `_DenseColumn` such as `numeric_column`, `embedding_column`, `bucketized_column`, `indicator_column`. If you have categorical features, you can wrap them with an `embedding_column` or `indicator_column`. weight_collections: A list of collection names to which the Variable will be added. Note that variables will also be added to collections `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). cols_to_vars: If not `None`, must be a dictionary that will be filled with a mapping from `_FeatureColumn` to list of `Variable`s. For example, after the call, we might have cols_to_vars = {_EmbeddingColumn( categorical_column=_HashedCategoricalColumn( key='sparse_feature', hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable 'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1' shape=(5, 10)]} If a column creates no variables, its value will be an empty list. scope: A name or variable scope to use cols_to_output_tensors: If not `None`, must be a dictionary that will be filled with a mapping from '_FeatureColumn' to the associated output `Tensor`s. from_template: True if the method is being instantiated from a `make_template`. Returns: A `Tensor` which represents input layer of a model. Its shape is (batch_size, first_layer_dimension) and its dtype is `float32`. first_layer_dimension is determined based on given `feature_columns`. Raises: ValueError: features and feature_columns have different lengths. """ local_cols_to_output_tensors = {} input_layer = original_input_layer( features=features, feature_columns=feature_columns, weight_collections=weight_collections, trainable=trainable, cols_to_vars=cols_to_vars, scope=scope, cols_to_output_tensors=local_cols_to_output_tensors, from_template=from_template) if cols_to_output_tensors is not None: cols_to_output_tensors = local_cols_to_output_tensors # Annotate features. # These are the parsed Tensors, before embedding. # Only annotate features used by FeatureColumns. # We figure which ones are used by FeatureColumns by creating a parsing # spec and looking at the keys. spec = feature_column_lib.make_parse_example_spec(feature_columns) for key in spec.keys(): tensor = ops.convert_to_tensor_or_indexed_slices(features[key]) ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) # Annotate feature columns. for column in feature_columns: # TODO(cyfoo): Find a better way to serialize and deserialize # _FeatureColumn. ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS, serialize_feature_column(column)) for column, tensor in local_cols_to_output_tensors.items(): ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name) ops.add_to_collection( LayerAnnotationsCollectionNames.values( LayerAnnotationsCollectionNames.PROCESSED_FEATURES), _to_any_wrapped_tensor_info(tensor)) return input_layer
def apply_gradients(self, grads_and_vars, global_step=None, name=None): # No DistributionStrategy case. grads_and_vars = tuple( grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError("Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) # p = _get_processor(v) converted_grads_and_vars.append((g, v)) # converted_grads_and_vars = tuple(converted_grads_and_vars) converted_grads_and_vars = tuple([(g, v) for g, v in converted_grads_and_vars if g is not None]) var_list = [v for g, v in converted_grads_and_vars] cache_var_list = [] for v in var_list: for c in self.cache_vars: if c.op.name == v.op.name + 'cache': cache_var_list.append(c) break assert len(var_list) == len(cache_var_list) if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([v.name for _, v in converted_grads_and_vars], )) with ops.init_scope(): self._create_slots(var_list) var_shapes = [v.shape for _, v in converted_grads_and_vars] slice_idx = np.concatenate( [[0], np.cumsum([np.prod(vs) for vs in var_shapes])], 0) # print(var_shapes) # print(slice_idx) with ops.name_scope(name, self._name) as name: self._prepare() grad_flatten = tf.concat([ tf.reshape(grad, [-1]) for grad, _ in converted_grads_and_vars ], 0) KL_grad = tf.gradients(self._mean_KL, var_list) KL_grad_flatten = tf.concat([tf.reshape(g, [-1]) for g in KL_grad], 0) # calculate Hessian * x def Hx_fn(m): grads = tf.gradients( tf.reduce_sum(KL_grad_flatten * tf.stop_gradient(m)), var_list) return tf.concat([tf.reshape(g, [-1]) for g in grads], 0) + 1e-5 x = self.cg(Hx_fn, grad_flatten) xHx = tf.reduce_sum(tf.transpose(x) * Hx_fn(x)) beta = tf.sqrt(2 * self._delta_t / (xHx + 1e-8)) def get_KL(policy): return tf.reduce_mean( tf.reduce_sum( tf.stop_gradient(self._policy) * tf.log( tf.stop_gradient(self._policy) / (policy + 1e-8) + 1e-8), 1)) i = tf.constant(0) def c(i, beta): with tf.control_dependencies([ control_flow_ops.group([ state_ops.assign( var, cache_var_list[i] - beta * tf.reshape(x[slice_idx[i]:slice_idx[i + 1]], var_shapes[i])) for i, (_, var) in enumerate(grads_and_vars) ]) ]): kl = get_KL(self.policy_fn()) cost = self.cost_fn() return tf.logical_and( i < self._ls_max_iter_t, tf.logical_or(kl > self._delta_t, cost > self._cost_before)) b = lambda i, beta: [i + 1, self._back_trace_ratio_t * beta] i, _ = tf.while_loop(c, b, loop_vars=[i, beta], back_prop=False) var_update = tf.cond( tf.logical_or(tf.equal(i, self._ls_max_iter_t), tf.logical_not(tf.reduce_any(tf.is_nan(x)))), lambda: self.cache2var, lambda: self.var2cache) if not context.executing_eagerly(): if isinstance(var_update, ops.Tensor): var_update = var_update.op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if var_update not in train_op: train_op.append(var_update) return var_update
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. RuntimeError: If you should use `_distributed_apply()` instead. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). # TODO(isaprykin): Get rid of `has_strategy()` check by # always calling _distributed_apply(), using the default distribution # as needed. if distribute_ctx.has_strategy(): # Handle DistributionStrategy case. if distribute_ctx.in_cross_replica_context(): raise RuntimeError("Use `_distributed_apply()` instead of " "`apply_gradients()` in a cross-replica context.") grads_and_vars = get_filtered_grad_fn(lambda: grads_and_vars)() return distribute_ctx.get_replica_context().merge_call( self._distributed_apply, args=(grads_and_vars, global_step, name)) # No DistributionStrategy case. grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError( "Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) converted_grads_and_vars.append((g, v, p)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v, _ in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, v, _ in converted_grads_and_vars],)) with ops.init_scope(): self._create_slots(var_list) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var, processor in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO(apassos): figure out how to get the variable name here. if context.executing_eagerly() or isinstance( var, resource_variable_ops.ResourceVariable) and not var._in_graph_mode: # pylint: disable=protected-access scope_name = "" else: scope_name = var.op.name with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: apply_updates = self._finish(update_ops, name) else: with ops.control_dependencies([self._finish(update_ops, "update")]): with ops.colocate_with(global_step): if isinstance(global_step, resource_variable_ops.ResourceVariable): # TODO(apassos): the implicit read in assign_add is slow; consider # making it less so. apply_updates = resource_variable_ops.assign_add_variable_op( global_step.handle, ops.convert_to_tensor(1, dtype=global_step.dtype), name=name) else: apply_updates = state_ops.assign_add(global_step, 1, name=name) if not context.executing_eagerly(): if isinstance(apply_updates, ops.Tensor): apply_updates = apply_updates.op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if apply_updates not in train_op: train_op.append(apply_updates) return apply_updates
def convert(x): if x is None: return None return ops.convert_to_tensor_or_indexed_slices(x)
def apply_gradients(self, grads_and_vars, error, global_step=None, name=None): """ Updates the weights of the network using EKF using grads_and_vars, the linearized measurment Matrices H and the variables, as well as the error = y_target-y_pred. Args: grads_and_vars: List of (H, variable) pairs as returned by `compute_gradient_Hs()`. error: the tensor y_target - y_pred global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError( "Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) converted_grads_and_vars.append((g, v)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, v in converted_grads_and_vars],)) with ops.control_dependencies(None): self._create_slots(var_list) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. with ops.name_scope("update_" + var.op.name), ops.colocate_with(var): update_ops.append(self._apply_dense(grad, var, error)) if global_step is None: apply_updates = self._finish(update_ops, name) else: with ops.control_dependencies([self._finish(update_ops, "update")]): with ops.colocate_with(global_step): apply_updates = state_ops.assign_add(global_step, 1, name=name).op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if apply_updates not in train_op: train_op.append(apply_updates) return apply_updates
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). grads_and_vars = tuple( grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError("Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) converted_grads_and_vars.append((g, v, p)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v, _ in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, _, v in converted_grads_and_vars], )) with ops.control_dependencies(None): self._create_slots([_get_variable_for(v) for v in var_list]) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var, processor in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO (apassos): figure out how to get the variable name here. id:3354 gh:3355 scope_name = var.op.name if context.in_graph_mode() else "" with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: apply_updates = self._finish(update_ops, name) else: with ops.control_dependencies( [self._finish(update_ops, "update")]): with ops.colocate_with(global_step): apply_updates = state_ops.assign_add(global_step, 1, name=name) if context.in_graph_mode(): if isinstance(apply_updates, ops.Tensor): apply_updates = apply_updates.op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if apply_updates not in train_op: train_op.append(apply_updates) return apply_updates
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). grads_and_vars = tuple( grads_and_vars) # Make sure repeat iteration works converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError( "Gradient must be convertible to a Tensor or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices, type(None))): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) if not isinstance(v, variables.Variable): raise TypeError("Variable must be a tf.Variable: %s" % v) converted_grads_and_vars.append((g, v)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s" % (converted_grads_and_vars, )) with ops.control_dependencies(None): self._create_slots(var_list) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. with ops.name_scope("update_" + var.op.name), ops.colocate_with(var): if isinstance(grad, ops.Tensor): update_ops.append(self._apply_dense(grad, var)) else: update_ops.append(self._apply_sparse(grad, var)) if global_step is None: return self._finish(update_ops, name) else: with ops.control_dependencies( [self._finish(update_ops, "update")]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1, name=name).op