def variable_creator_scope(self, next_creator, **kwargs): """Creates variables & adds them to collections to match legacy code.""" collections = kwargs.pop("collections", None) v = None # Get expected variable name. name = kwargs.get("name", None) with ops.name_scope(name, "Variable") as name_scope: name = name_scope if self._share_variables: v = self._variables_by_name.get(name, None) if v is None: v = next_creator(**kwargs) self._variables.append(v) if self._share_variables: self._variables_by_name[name] = v if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if v.trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] ops.add_to_collections(collections, v) return v
def _register_variable_read(read, collections, trainable): """Helper function to put a read from a variable in the collections.""" if collections is None: collections = [] if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] ops.add_to_collections(collections, read)
def var_creator(*args, **kwargs): """Create an AggregatingVariable and fix up collections.""" # Record what collections this variable should be added to. collections = kwargs.pop("collections", None) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] kwargs["collections"] = [] # Create and wrap the variable. v = next_creator(*args, **kwargs) wrapped = values.AggregatingVariable(v, aggregation) # Add the wrapped variable to the requested collections. # The handling of eager mode and the global step matches # ResourceVariable._init_from_args(). if not context.executing_eagerly(): g = ops.get_default_graph() # If "trainable" is True, next_creator() will add the contained # variable to the TRAINABLE_VARIABLES collection, so we manually # remove it and replace with the wrapper. We can't set "trainable" # to False for next_creator() since that causes functions like # implicit_gradients to skip those variables. if kwargs.get("trainable", True): collections.append(ops.GraphKeys.TRAINABLE_VARIABLES) l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES) l.remove(v) g.add_to_collections(collections, wrapped) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, wrapped) return wrapped
def __init__(self, initial_value, trainable=True, collections=None, validate_shape=True, name=None): """Creates a new variable with value `initial_value`. The new variable is added to the graph collections listed in `collections`, which defaults to `[GraphKeys.VARIABLES]`. If `trainable` is `True` the variable is also added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This constructor creates both a `variable` Op and an `assign` Op to set the variable to its initial value. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`. The initial value for the Variable. Must have a shape specified unless `validate_shape` is set to False. trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. Returns: A Variable. Raises: ValueError: If the initial value does not have a shape and `validate_shape` is `True`. """ if collections is None: collections = [ops.GraphKeys.VARIABLES] if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] with ops.control_dependencies(None): with ops.op_scope([initial_value], name, "Variable") as name: self._initial_value = ops.convert_to_tensor(initial_value, name="initial_value") initial_value_shape = self._initial_value.get_shape() if validate_shape and not initial_value_shape.is_fully_defined(): raise ValueError("initial_value must have a shape specified: %s" % self._initial_value) shape_to_set = initial_value_shape if validate_shape else [] self._variable = state_ops.variable_op( shape_to_set, self._initial_value.dtype.base_dtype, set_shape=validate_shape, name=name) with ops.device(self._variable.device): self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape).op self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._save_slice_info = None
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None): """Creates a new variable from arguments. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`. The initial value for the Variable. Must have a shape specified unless `validate_shape` is set to False. trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") if collections is None: collections = [ops.GraphKeys.VARIABLES] if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] with ops.control_dependencies(None): with ops.op_scope([initial_value], name, "Variable") as name: self._initial_value = ops.convert_to_tensor(initial_value, name="initial_value") initial_value_shape = self._initial_value.get_shape() if validate_shape and not initial_value_shape.is_fully_defined(): raise ValueError("initial_value must have a shape specified: %s" % self._initial_value) shape_to_set = initial_value_shape if validate_shape else [] self._variable = state_ops.variable_op( shape_to_set, self._initial_value.dtype.base_dtype, set_shape=validate_shape, name=name) with ops.device(self._variable.device): self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape).op with ops.device(caching_device if caching_device is not None else self._variable.device): self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._caching_device = caching_device self._save_slice_info = None
def _register_dense_variable_read(read, collections, trainable): """Helper function to put a read from a dense variable in the collections.""" if collections is None: collections = [] if (trainable and ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES not in collections): collections = (list(collections) + [ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES]) ops.add_to_collections(collections, read)
def _init_from_args(self, name): """Initialize the CriticalSection from constructor arguments.""" with ops.name_scope(name, "CriticalSection", []) as name: with ops.control_dependencies(None): # pylint: disable=protected-access handle_name = ops._name_from_scope_name(name) container = ops.get_default_graph()._container # pylint: enable=protected-access if container is None: container = "" self._handle = gen_resource_variable_ops.critical_section_op( shared_name=handle_name, name=name) if context.in_graph_mode(): ops.add_to_collections(CRITICAL_SECTIONS, self)
def _init_from_args(self, name, shared_name): # pylint: disable=invalid-name """Initialize the CriticalSection from constructor arguments.""" with ops.name_scope(name, "CriticalSection", []) as name: with ops.init_scope(): # pylint: disable=protected-access container = ops.get_default_graph()._container # pylint: enable=protected-access if shared_name is None: shared_name = name if container is None: container = "" self._handle = gen_resource_variable_ops.mutex_v2( shared_name=shared_name, container=container, name=name) if not context.executing_eagerly(): ops.add_to_collections(CRITICAL_SECTIONS, self)
def variable_creator_scope(self, next_creator, **kwargs): """Creates variables & adds them to collections to match legacy code.""" v = next_creator(**kwargs) self._variables.append(v) collections = kwargs.get("collections") trainable = v.trainable if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] ops.add_to_collections(collections, v) return v
def _init_from_args(self, name, shared_name): # pylint: disable=invalid-name """Initialize the Notification from constructor arguments.""" with ops.name_scope(name, "Notification", []) as name: with ops.init_scope(): # pylint: disable=protected-access container = ops.get_default_graph()._container # pylint: enable=protected-access if shared_name is None: shared_name = name if container is None: container = "" # Build the notification resource outside of any control dependencies. with ops.control_dependencies(None): self._handle = gen_resource_variable_ops.notification( shared_name=shared_name, container=container, name=name) if not context.executing_eagerly(): ops.add_to_collections(NOTIFICATIONS, self)
def collect_named_outputs(collections, alias, outputs): """Add `Tensor` outputs tagged with alias to collections. It is useful to collect end-points or tags for summaries. Example of usage: logits = collect_named_outputs('end_points', 'inception_v3/logits', logits) assert 'inception_v3/logits' in logits.aliases Args: collections: A collection or list of collections. If None skip collection. alias: String to append to the list of aliases of outputs, for example, 'inception_v3/conv1'. outputs: Tensor, an output tensor to collect Returns: The outputs Tensor to allow inline call. """ append_tensor_alias(outputs, alias) if collections: ops.add_to_collections(collections, outputs) return outputs
def collect_named_outputs(collections, name, outputs): """Add tuple (name, outputs) to collections. It is useful to collect end-points or tags for summaries. Example of usage: logits = collect_named_outputs('end_points', 'inception_v3/logits', logits) Args: collections: A collection or list of collections. If None skip collection. name: String, name to represent the outputs, ex. 'inception_v3/conv1' outputs: Tensor, an output tensor to collect Returns: The outputs Tensor to allow inline call. """ if collections: # Remove ending '/' if present. if name[-1] == '/': name = name[:-1] ops.add_to_collections(collections, (name, outputs)) return outputs
def collect_named_outputs(collections, alias, outputs): """Add `Tensor` outputs tagged with alias to collections. It is useful to collect end-points or tags for summaries. Example of usage: logits = collect_named_outputs('end_points', 'inception_v3/logits', logits) assert logits.alias == 'inception_v3/logits' Args: collections: A collection or list of collections. If None skip collection. alias: String, alias to name the outputs, ex. 'inception_v3/conv1' outputs: Tensor, an output tensor to collect Returns: The outputs Tensor to allow inline call. """ # Remove ending '/' if present. if alias[-1] == '/': alias = alias[:-1] outputs.alias = alias if collections: ops.add_to_collections(collections, outputs) return outputs
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: Ignored. Provided for compatibility with tf.Variable. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] self._save_slice_info = None with ops.control_dependencies(None): with ops.name_scope( name, "Variable", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access true_name = ops._name_from_scope_name(name) if init_from_fn: # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % true_name)])) with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), ops.device(None): self._initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype) self._handle = gen_resource_variable_ops.var_handle_op( shape=self._initial_value.get_shape(), dtype=self._initial_value.dtype.base_dtype, shared_name=true_name, name=name) # pylint: enable=protected-access # Or get the initial value from a Tensor or Python object. else: self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) self._handle = gen_resource_variable_ops.var_handle_op( shape=self._initial_value.get_shape(), dtype=self._initial_value.dtype.base_dtype, shared_name=true_name, name=name) self._dtype = self._initial_value.dtype.base_dtype with ops.name_scope("IsInitialized"): self._is_initialized_op = ( gen_resource_variable_ops.var_is_initialized_op( self._handle)) if initial_value is not None: with ops.name_scope("Assign") as n, ops.colocate_with( self._handle): self._initialize_op = gen_resource_variable_ops.assign_variable_op( self._handle, self._initial_value, name=n) with ops.name_scope("Read"), ops.colocate_with(self._handle): # Manually assign reads to the handle's device to avoid log messages. with ops.device(self._handle.device): value = gen_resource_variable_ops.read_variable_op( self._handle, dtype=self._dtype) self._graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() # context. At the same time, users would expect caching device to be # independent of this context, and/or would not expect the current # device context to be merged with the caching device spec. # Therefore we reset the colocation stack before creating the cached # value. Note that resetting the colocation stack will also reset # the device stack. with ops.colocate_with(None, ignore_existing=True): with ops.device(caching_device): self._cached_value = array_ops.identity(value) else: self._cached_value = None ops.add_to_collections(collections, self)
def f1_micro(labels, predictions, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None): if context.executing_eagerly(): raise RuntimeError( 'tf1.f1_micro is not supported when eager execution is enabled.') with tf.variable_scope(name, 'f1_micro', (predictions, labels, weights)): predictions, labels, weights = _remove_squeezable_dimensions( predictions=tf.cast(predictions, dtype=tf.int32), labels=tf.cast(labels, dtype=tf.int32), weights=weights) tps, fps, fns = [], [], [] for class_id in range(num_classes): class_labels, class_predictions = _select_class( labels=labels, predictions=predictions, class_id=class_id) tps.append( tf.metrics.true_positives( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='true_positives_{}'.format(class_id), )) fps.append( tf.metrics.false_positives( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='false_positives_{}'.format(class_id), )) fns.append( tf.metrics.false_negatives( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='false_negatives_{}'.format(class_id), )) def compute_f1_micro(_tps, _fps, _fns, _name): _precision = tf.div_no_nan( tf.add_n(_tps), tf.add_n(_tps + _fps), ) _recall = tf.div_no_nan( tf.add_n(_tps), tf.add_n(_tps + _fns), ) return 2. * tf.div_no_nan( _precision * _recall, _precision + _recall, name=_name) def once_across_towers(_, _tps, _fps, _fns): return compute_f1_micro(_tps, _fps, _fns, 'value') value = _aggregate_across_towers( metrics_collections, once_across_towers, [tp for tp, _ in tps], [fp for fp, _ in fps], [fn for fn, _ in fns], ) update_op = compute_f1_micro([tp for _, tp in tps], [fp for _, fp in fps], [fn for _, fn in fns], 'update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) return value, update_op
def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs): # pylint: disable=g-missing-docstring # Figure out what collections this variable should be added to. # We'll add the MirroredVariable to those collections instead. collections = kwargs.pop("collections", None) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] kwargs["collections"] = [] # Get synchronization value synchronization = kwargs.get( "synchronization", variable_scope.VariableSynchronization.ON_WRITE) if synchronization == variable_scope.VariableSynchronization.NONE: raise ValueError( "`NONE` variable synchronization mode is not " "supported with `Mirrored` distribution strategy. Please" " change the `synchronization` for variable: " + kwargs["name"]) elif synchronization == variable_scope.VariableSynchronization.ON_READ: # Variables that are to be synced on read are replica local. is_replica_local = True kwargs["trainable"] = False elif (synchronization == variable_scope.VariableSynchronization.ON_WRITE or synchronization == variable_scope.VariableSynchronization.AUTO): # `AUTO` synchronization for `MirroredStrategy` is `ON_WRITE`. is_replica_local = False else: raise ValueError("Invalid variable synchronization mode: " + synchronization + " for variable: " + kwargs["name"]) # Get aggregation value aggregation = kwargs.pop("aggregation", variable_scope.VariableAggregation.NONE) if aggregation not in ( variable_scope.VariableAggregation.NONE, variable_scope.VariableAggregation.SUM, variable_scope.VariableAggregation.MEAN, variable_scope.VariableAggregation.ONLY_FIRST_REPLICA): raise ValueError("Invalid variable aggregation mode: " + aggregation + " for variable: " + kwargs["name"]) # Ignore user-specified caching device, not needed for mirrored variables. kwargs.pop("caching_device", None) # TODO(josh11b,apassos): It would be better if variable initialization # was never recorded on the tape instead of having to do this manually # here. with tape.stop_recording(): index = real_mirrored_creator(devices, *args, **kwargs) if is_replica_local: result = values.ReplicaLocalVariable(index, index[devices[0]], aggregation) else: result = values.MirroredVariable(index, index[devices[0]], aggregation) # Add the wrapped variable to the requested collections. # The handling of eager mode and the global step matches # ResourceVariable._init_from_args(). if not context.executing_eagerly(): g = ops.get_default_graph() # If "trainable" is True, next_creator() will add the member variables # to the TRAINABLE_VARIABLES collection, so we manually remove # them and replace with the MirroredVariable. We can't set # "trainable" to False for next_creator() since that causes functions # like implicit_gradients to skip those variables. if kwargs.get("trainable", True): collections.append(ops.GraphKeys.TRAINABLE_VARIABLES) l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES) for v in index.values(): if v in l: l.remove(v) g.add_to_collections(collections, result) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result) return result
def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores, tp_tensor, fp_tensor, remove_zero_labels=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of precision / recall arrays. This metrics keeps tracks of boolean True positives and False positives arrays. """ with variable_scope.variable_scope( name, 'stream_precision_recall', [n_gbboxes, rclasses, tp_tensor, fp_tensor]): n_gbboxes = math_ops.to_int64(n_gbboxes) rclasses = math_ops.to_int64(rclasses) rscores = math_ops.to_float(rscores) stype = tf.int32 tp_tensor = tf.cast(tp_tensor, stype) fp_tensor = tf.cast(fp_tensor, stype) # Reshape TP and FP tensors and clean away 0 class values. rclasses = tf.reshape(rclasses, [-1]) rscores = tf.reshape(rscores, [-1]) tp_tensor = tf.reshape(tp_tensor, [-1]) fp_tensor = tf.reshape(fp_tensor, [-1]) if remove_zero_labels: mask = tf.greater(rclasses, 0) rclasses = tf.boolean_mask(rclasses, mask) rscores = tf.boolean_mask(rscores, mask) tp_tensor = tf.boolean_mask(tp_tensor, mask) fp_tensor = tf.boolean_mask(fp_tensor, mask) # Local variables accumlating information over batches. v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64) v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32) v_scores = _create_local('v_scores', shape=[ 0, ]) v_tp = _create_local('v_tp', shape=[ 0, ], dtype=stype) v_fp = _create_local('v_fp', shape=[ 0, ], dtype=stype) # Update operations. nobjects_op = state_ops.assign_add(v_nobjects, tf.reduce_sum(n_gbboxes)) ndetections_op = state_ops.assign_add( v_ndetections, tf.size(rscores, out_type=tf.int32)) scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0), validate_shape=False) tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0), validate_shape=False) fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0), validate_shape=False) # Precision and recall computations. # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value') r = _precision_recall(v_nobjects, v_ndetections, v_scores, v_tp, v_fp, 'value') with ops.control_dependencies( [nobjects_op, ndetections_op, scores_op, tp_op, fp_op]): update_op = _precision_recall(nobjects_op, ndetections_op, scores_op, tp_op, fp_op, 'update_op') # update_op = tf.Print(update_op, # [tf.reduce_sum(tf.cast(mask, tf.int64)), # tf.reduce_sum(tf.cast(mask2, tf.int64)), # tf.reduce_min(rscores), # tf.reduce_sum(n_gbboxes)], # 'Metric: ') # Some debugging stuff! # update_op = tf.Print(update_op, # [tf.shape(tp_op), # tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)], # 'TP and FP shape: ') # update_op[0] = tf.Print(update_op, # [nobjects_op], # '# Groundtruth bboxes: ') # update_op = tf.Print(update_op, # [update_op[0][0], # update_op[0][-1], # tf.reduce_min(update_op[0]), # tf.reduce_max(update_op[0]), # tf.reduce_min(update_op[1]), # tf.reduce_max(update_op[1])], # 'Precision and recall :') if metrics_collections: ops.add_to_collections(metrics_collections, r) if updates_collections: ops.add_to_collections(updates_collections, update_op) return r, update_op
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: Ignored. Provided for compatibility with tf.Variable. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] self._save_slice_info = None with ops.control_dependencies(None): with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access true_name = ops._name_from_scope_name(name) if init_from_fn: # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % true_name)])) with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), ops.device(None): self._initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype) self._handle = gen_resource_variable_ops.var_handle_op( shape=self._initial_value.get_shape(), dtype=self._initial_value.dtype.base_dtype, shared_name=true_name, name=name) # pylint: enable=protected-access # Or get the initial value from a Tensor or Python object. else: self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) self._handle = gen_resource_variable_ops.var_handle_op( shape=self._initial_value.get_shape(), dtype=self._initial_value.dtype.base_dtype, shared_name=true_name, name=name) self._dtype = self._initial_value.dtype.base_dtype with ops.name_scope("IsInitialized"): self._is_initialized_op = ( gen_resource_variable_ops.var_is_initialized_op(self._handle)) if initial_value is not None: with ops.name_scope("Assign") as n, ops.colocate_with(self._handle): self._initialize_op = gen_resource_variable_ops.assign_variable_op( self._handle, self._initial_value, name=n) with ops.name_scope("Read"), ops.colocate_with(self._handle): # Manually assign reads to the handle's device to avoid log messages. with ops.device(self._handle.device): value = gen_resource_variable_ops.read_variable_op( self._handle, dtype=self._dtype) self._graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() # context. At the same time, users would expect caching device to be # independent of this context, and/or would not expect the current # device context to be merged with the caching device spec. # Therefore we reset the colocation stack before creating the cached # value. Note that resetting the colocation stack will also reset # the device stack. with ops.colocate_with(None, ignore_existing=True): with ops.device(caching_device): self._cached_value = array_ops.identity(value) else: self._cached_value = None ops.add_to_collections(collections, self)
def _init_from_args( self, initial_value=None, trainable=None, collections=None, caching_device=None, name=None, dtype=None, constraint=None, synchronization=None, aggregation=None, distribute_strategy=None, shape=None, ): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. Defaults to `True`, unless `synchronization` is set to `ON_READ`, in which case it defaults to `False`. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to `AUTO` and the current `DistributionStrategy` chooses when to synchronize. aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class `tf.VariableAggregation`. distribute_strategy: DistributionStrategy under which this variable was created. shape: (optional) The shape of this variable. If None, the shape of `initial_value` will be used. When setting this argument to `tf.TensorShape(None)` (representing an unspecified shape), the variable can be assigned with values of different shapes. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. @compatibility(eager) When Eager Execution is enabled, variables are never added to collections. It is not implicitly added to the `GLOBAL_VARIABLES` or `TRAINABLE_VARIABLES` collections, and the `collections` argument is ignored. @end_compatibility """ ( synchronization, aggregation, trainable, ) = variables.validate_synchronization_aggregation_trainable( synchronization, aggregation, trainable, name) if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if (isinstance(initial_value, ops.Tensor) and hasattr(initial_value, "graph") and initial_value.graph.building_function): raise ValueError( "Tensor-typed variable initializers must either be " "wrapped in an init_scope or callable " "(e.g., `tf.Variable(lambda : " "tf.truncated_normal([10, 40]))`) when building " "functions. Please file a feature request if this " "restriction inconveniences you.") if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") if isinstance(initial_value, trackable.CheckpointInitialValue): self._maybe_initialize_trackable() self._update_uid = initial_value.checkpoint_position.restore_uid initial_value = initial_value.wrapped_value if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] with ops.init_scope(): self._in_graph_mode = not context.executing_eagerly() with ops.name_scope( name, "TrainableWrapper", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access handle_name = ops.name_from_scope_name(name) handle_name = handle_name or "TrainableWrapperHandle" if self._in_graph_mode: shared_name = handle_name unique_id = shared_name else: # When in eager mode use a uid for the shared_name, to prevent # accidental sharing. unique_id = "%s_%d" % (handle_name, ops.uid()) shared_name = None # Never shared # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. device_context_manager = (ops.device if self._in_graph_mode else ops.NullContextmanager) attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % handle_name)])) with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), device_context_manager( None): initial_value = ops.convert_to_tensor( initial_value() if init_from_fn else initial_value, name="initial_value", dtype=dtype, ) if shape is None: shape = initial_value.shape handle = resource_variable_ops.eager_safe_variable_handle( initial_value=initial_value, shape=None, # shape, shared_name=shared_name, name=name, graph_mode=self._in_graph_mode, ) # pylint: disable=protected-access if (self._in_graph_mode and initial_value is not None and initial_value.op._get_control_flow_context() is not None): raise ValueError( "Initializer for variable %s is from inside a control-flow " "construct, such as a loop or conditional. When creating a " "variable inside a loop or conditional, use a lambda as the " "initializer." % name) # pylint: enable=protected-access dtype = initial_value.dtype.base_dtype if self._in_graph_mode: with ops.name_scope("IsInitialized"): is_initialized_op = (gen_resource_variable_ops. var_is_initialized_op(handle)) if initial_value is not None: # pylint: disable=g-backslash-continuation with ops.name_scope("Assign") as n, ops.colocate_with( None, ignore_existing=True), ops.device( handle.device): # pylint: disable=protected-access initializer_op = gen_resource_variable_ops.assign_variable_op( handle, variables. _try_guard_against_uninitialized_dependencies( name, initial_value), name=n, ) # pylint: enable=protected-access # pylint: enable=g-backslash-continuation with ops.name_scope("Read"): # Manually assign reads to the handle's device to avoid log # messages. with ops.device(handle.device): with ops.control_dependencies([ gen_resource_variable_ops. assign_variable_op( handle, self.prefetch_values(), name="AssignBeforeInitRead", ) ]): value = gen_resource_variable_ops.read_variable_op( handle, dtype) graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() # context. At the same time, users would expect caching device to # be independent of this context, and/or would not expect the # current device context to be merged with the caching device # spec. Therefore we reset the colocation stack before creating # the cached value. Note that resetting the colocation stack will # also reset the device stack. with ops.colocate_with(None, ignore_existing=True): with ops.device(caching_device): cached_value = array_ops.identity(value) else: cached_value = None else: gen_resource_variable_ops.assign_variable_op( handle, initial_value) is_initialized_op = None initializer_op = None graph_element = None if caching_device: with ops.device(caching_device): with ops.control_dependencies([ gen_resource_variable_ops. assign_variable_op( handle, self.prefetch_values(), name="AssignBeforeInitRead", ) ]): cached_value = ( gen_resource_variable_ops.read_variable_op( handle, dtype)) else: cached_value = None if not context.executing_eagerly(): # Eager variables are only added to collections if they are part of an # eager variable store (otherwise in an interactive session they would # hog memory and cause OOM). This is done in ops/variable_scope.py. ops.add_to_collections(collections, self) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self) initial_value = initial_value if self._in_graph_mode else None super(resource_variable_ops.ResourceVariable, self).__init__( trainable=trainable, shape=shape, dtype=dtype, handle=handle, synchronization=synchronization, constraint=constraint, aggregation=aggregation, distribute_strategy=distribute_strategy, name=name, unique_id=unique_id, handle_name=handle_name, graph_element=graph_element, initial_value=initial_value, initializer_op=initializer_op, is_initialized_op=is_initialized_op, cached_value=cached_value, )
def f1_score(labels, predictions, weights=None, num_thresholds=200, metrics_collections=None, updates_collections=None, name=None): """Computes the approximately best F1-score across different thresholds. The f1_score function applies a range of thresholds to the predictions to convert them from [0, 1] to bool. Precision and recall are computed by comparing them to the labels. The F1-Score is then defined as 2 * precision * recall / (precision + recall). The best one across the thresholds is returned. Disclaimer: In practice it may be desirable to choose the best threshold on the validation set and evaluate the F1 score with this threshold on a separate test set. Or it may be desirable to use a fixed threshold (e.g. 0.5). This function internally creates four local variables, `true_positives`, `true_negatives`, `false_positives` and `false_negatives` that are used to compute the pairs of recall and precision values for a linearly spaced set of thresholds from which the best f1-score is derived. This value is ultimately returned as `f1-score`, an idempotent operation that computes the F1-score (computed using the aforementioned variables). The `num_thresholds` variable controls the degree of discretization with larger numbers of thresholds more closely approximating the true best F1-score. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the F1-score. Example usage with a custom estimator: def model_fn(features, labels, mode): predictions = make_predictions(features) loss = make_loss(predictions, labels) train_op = tf.contrib.training.create_train_op( total_loss=loss, optimizer='Adam') eval_metric_ops = {'f1': f1_score(labels, predictions)} return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs) estimator = tf.estimator.Estimator(model_fn=model_fn) If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: A `Tensor` whose shape matches `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). num_thresholds: The number of thresholds to use when discretizing the roc curve. metrics_collections: An optional list of collections that `f1_score` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: f1_score: A scalar `Tensor` representing the current best f1-score across different thresholds. update_op: An operation that increments the `true_positives`, `true_negatives`, `false_positives` and `false_negatives` variables appropriately and whose value matches the `f1_score`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope(name, 'f1', (labels, predictions, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions=predictions, labels=labels, weights=weights) # To account for floating point imprecisions / avoid division by zero. epsilon = 1e-7 thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)] thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon] # Confusion matrix. values, update_ops = metrics_impl._confusion_matrix_at_thresholds( # pylint: disable=protected-access labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn')) # Compute precision and recall at various thresholds. def compute_best_f1_score(tp, fp, fn, name): precision_at_t = math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) # Compute F1 score. f1_at_thresholds = (2.0 * precision_at_t * recall_at_t / (precision_at_t + recall_at_t + epsilon)) return math_ops.reduce_max(f1_at_thresholds) def f1_across_towers(_, values): best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'], fn=values['fn'], name='value') if metrics_collections: ops.add_to_collections(metrics_collections, best_f1) return best_f1 best_f1 = distribute_lib.get_tower_context().merge_call( f1_across_towers, values) update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'], fn=update_ops['fn'], name='update') if updates_collections: ops.add_to_collections(updates_collections, update_op) return best_f1, update_op
def f1_across_replicas(_, values): best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'], fn=values['fn'], name='value') if metrics_collections: ops.add_to_collections(metrics_collections, best_f1) return best_f1
def recall(labels, predictions, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes the recall of the predictions with respect to the labels. The `recall` function creates two local variables, `true_positives` and `false_negatives`, that are used to compute the recall. This value is ultimately returned as `recall`, an idempotent operation that simply divides `true_positives` by the sum of `true_positives` and `false_negatives`. For estimation of the metric over a stream of data, the function creates an `update_op` that updates these variables and returns the `recall`. `update_op` weights each prediction by the corresponding value in `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: The ground truth values, a `Tensor` whose dimensions must match `predictions`. Will be cast to `bool`. predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that `recall` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: recall: Scalar float `Tensor` with the value of `true_positives` divided by the sum of `true_positives` and `false_negatives`. update_op: `Operation` that increments `true_positives` and `false_negatives` variables appropriately and whose value matches `recall`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ if context.in_eager_mode(): raise RuntimeError('tf.metrics.recall is not supported is not ' 'supported when eager execution is enabled.') with variable_scope.variable_scope(name, 'recall', (predictions, labels, weights)): predictions, labels, weights = _remove_squeezable_dimensions( predictions=math_ops.cast(predictions, dtype=dtypes.bool), labels=math_ops.cast(labels, dtype=dtypes.bool), weights=weights) true_p, true_positives_update_op = true_positives( labels, predictions, weights, metrics_collections=None, updates_collections=None, name=None) false_n, false_negatives_update_op = false_negatives( labels, predictions, weights, metrics_collections=None, updates_collections=None, name=None) def compute_recall(true_p, false_n, name): return array_ops.where(math_ops.greater(true_p + false_n, 0), math_ops.div(true_p, true_p + false_n), 0, name) update_op = compute_recall(true_positives_update_op, false_negatives_update_op, 'update_op') with tf.control_dependencies([update_op]): rec = compute_recall(true_p, false_n, 'value') if metrics_collections: ops.add_to_collections(metrics_collections, rec) if updates_collections: ops.add_to_collections(updates_collections, update_op) return rec, update_op
def __init__( self, key_dtype=dtypes.int64, value_dtype=dtypes.float32, dim=1, devices=None, partitioner=default_partition_fn, shared_name=None, name="DynamicEmbedding_Variable", initializer=None, trainable=True, checkpoint=True, init_size=0, restrict_policy=None, ): """Creates an empty `Variable` object. Creates a group of tables placed on devices, the type of its keys and values are specified by key_dtype and value_dtype, respectively. The environment variables 'TF_HASHTABLE_INIT_SIZE' can be used to set the inital size of each tables, which can help reduce rehash times. The default initial table size : 1,048,576 for CPU, 16,777,216 for GPU. Args: key_dtype: the type of the key tensors. value_dtype: the type of the value tensors. dim: the length of the value array for each key. devices: the list of devices holding the tables. One table will be created on each device. partitioner: partition function of keys, return the partition index for each key. Example partition func: ```python def default_partition_fn(keys, shard_num): return tf.cast(keys % shard_num, dtype=tf.int32) ``` shared_name: No used. name: A name for the operation (optional). initializer: The value to use if a key is missing in the hash table. which can be a python number, numpy array or `tf.initializer` instances. If initializer is `None` (the default), `0` will be taken. trainable: True, will be treated as a trainable Variable, and add to to the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. checkpoint: if True, the contents of the SparseVariable are saved to and restored from checkpoints. If `shared_name` is empty for a checkpointed table, it is shared using the table node name. init_size: initial size for the Variable and initial size of each hash tables will be int(init_size / N), N is the number of the devices. restrict_policy: a restrict policy to specify the rule to restrict the size of variable. If in training program, the variable is updated by optimizer, then the sparse slot variables in optimizer are also be restricted. Returns: A `Variable` object. """ self.key_dtype = key_dtype self.value_dtype = value_dtype self.dim = dim def _get_default_devices(): gpu_list = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] return gpu_list[0:1] or [ "/CPU:0", ] devices_ = devices or _get_default_devices() self.devices = (devices_ if isinstance(devices_, list) else [ devices, ]) self.partition_fn = partitioner self.name = name self.shared_name = shared_name or "shared_name.{}".format(name) self.initializer = None self.trainable = trainable self.checkpoint = checkpoint self._tables = [] self.size_ops = [] self.shard_num = len(self.devices) self.init_size = int(init_size / self.shard_num) if restrict_policy is not None: if not issubclass(restrict_policy, de.RestrictPolicy): raise TypeError( 'restrict_policy must be subclass of RestrictPolicy.') self._restrict_policy = restrict_policy(self) else: self._restrict_policy = None key_dtype_list = [dtypes.int32, dtypes.int64] value_dtype_list = [ dtypes.int32, dtypes.int64, dtypes.bool, dtypes.float32, dtypes.float64, dtypes.half, dtypes.int8, ] if "GPU" in self.devices[0].upper(): key_dtype_list = [dtypes.int64] value_dtype_list = [ dtypes.int32, dtypes.float32, dtypes.half, dtypes.int8 ] if key_dtype not in key_dtype_list: raise TypeError("key_dtype should be ", key_dtype_list) if value_dtype not in value_dtype_list: raise TypeError("value_dtype should be ", value_dtype_list) _initializer = initializer if _initializer is None: _initializer = init_ops.zeros_initializer(dtype=self.value_dtype) static_default_value = self._convert_anything_to_init( _initializer, dim) scope_name = self.name.split("/")[-1] with ops.name_scope(scope_name, "DynamicEmbedding_Variable"): with ops.colocate_with(None, ignore_existing=True): for idx in range(len(self.devices)): with ops.device(self.devices[idx]): mht = None mht = de.CuckooHashTable( key_dtype=self.key_dtype, value_dtype=self.value_dtype, default_value=static_default_value, name=self._make_name(idx), checkpoint=self.checkpoint, init_size=self.init_size, ) self._tables.append(mht) super(Variable, self).__init__() ops.add_to_collection(de.GraphKeys.DYNAMIC_EMBEDDING_VARIABLES, self) if trainable: ops.add_to_collections( de.GraphKeys.TRAINABLE_DYNAMIC_EMBEDDING_VARIABLES, self)
def _streaming_tp_fp_array(num_gt_boxes, tp, fp, scores, class_name, remove_zero_scores=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of True Positive and False Positive arrays. This metrics also keeps track of scores and number of grountruth objects. """ default_name = 'streaming_tp_fp_{}'.format(class_name) # Input Tensors... with variable_scope.variable_scope(name, default_name, [num_gt_boxes, tp, fp, scores]): tp = tf.cast(tp, tf.bool) fp = tf.cast(fp, tf.bool) scores = tf.to_float(scores) num_gt_boxes = tf.to_int64(num_gt_boxes) # Reshape TP and FP tensors and clean away 0 class values. tp = tf.reshape(tp, [-1]) fp = tf.reshape(fp, [-1]) scores = tf.reshape(scores, [-1]) # Remove TP and FP both false. if remove_zero_scores: mask = tf.logical_or(tp, fp) rm_threshold = 1e-4 mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) tp = tf.boolean_mask(tp, mask) fp = tf.boolean_mask(fp, mask) scores = tf.boolean_mask(scores, mask) # Local variables accumlating information over batches. tp_value = metrics_impl._create_local('tp_value', shape=[ 0, ], dtype=tf.bool, validate_shape=False) fp_value = metrics_impl._create_local('fp_value', shape=[ 0, ], dtype=tf.bool, validate_shape=False) scores_value = metrics_impl._create_local('scores_value', shape=[ 0, ], validate_shape=False) num_gt_boxes_value = metrics_impl._create_local('num_gt_boxes_value', shape=[], dtype=tf.int64) # Update operations. tp_op = tf.assign(tp_value, tf.concat([tp_value, tp], axis=0), validate_shape=False) fp_op = tf.assign(fp_value, tf.concat([fp_value, fp], axis=0), validate_shape=False) scores_op = tf.assign(scores_value, tf.concat([scores_value, scores], axis=0), validate_shape=False) num_gt_boxes_op = tf.assign_add(num_gt_boxes_value, num_gt_boxes) # Value and update ops. values = (tp_value, fp_value, scores_value, num_gt_boxes_value) update_ops = (tp_op, fp_op, scores_op, num_gt_boxes_op) if metrics_collections: ops.add_to_collections(metrics_collections, values) if updates_collections: ops.add_to_collections(updates_collections, update_ops) update_op = tf.group(*update_ops) return values, update_op
def official_batch_norm(inputs, channels, type=False, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """ Args: inputs: a tensor of size `[batch_size, height, width, channels]` or `[batch_size, channels]`. type: False is non-convolution batch norm,True is convolution batch norm. decay: decay for the moving average. center: If True, subtract `beta`. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: small float added to variance to avoid dividing by zero. activation_fn: Optional activation function. updates_collections: collections to collect the update ops for computation. is_training: whether or not the layer is in training mode. reuse: whether or not the layer and its variables should be reused. variables_collections: optional collections for the variables. outputs_collections: collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. """ with variable_scope.variable_scope(scope, 'BatchNorm', [inputs], reuse=reuse) as sc: dtype = inputs.dtype.base_dtype axis = [0, 1, 2] if type else [0] params_shape = [channels] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None # param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') # beta_initializer = param_initializers.get('beta',init_ops.zeros_initializer) beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') # gamma_initializer = param_initializers.get('gamma',init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=tf.ones_initializer(), collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') # moving_mean_initializer = param_initializers.get('moving_mean', init_ops.zeros_initializer) moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') # moving_variance_initializer = param_initializers.get('moving_variance', init_ops.ones_initializer()) moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=tf.ones_initializer(), trainable=False, collections=moving_variance_collections) if is_training: # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies( [update_moving_mean, update_moving_variance]): outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: outputs = nn.batch_normalization(inputs, moving_mean, moving_variance, beta, gamma, epsilon) # TODO:shape # outputs.set_shape(inputs.get_shape()) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Code modification of tensorflow/contrib/layers/python/layers/layers.py """ with variable_scope.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype axis = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined last dimension %s.' % (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma = variables.model_variable( 'gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, trainable=False, collections=moving_variance_collections) # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies( [update_moving_mean, update_moving_variance]): outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) test_outputs = nn.batch_normalization(inputs, moving_mean, moving_variance, beta, gamma, epsilon) outputs = tf.cond(is_training, lambda: outputs, lambda: test_outputs) outputs.set_shape(inputs_shape) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def execute(self, fn, *args, **kwargs): """Execute function `fn(*args, **kwargs)` inside the CriticalSection. Args: fn: The function to execute. Must return at least one tensor. *args: Additional positional arguments to `fn`. **kwargs: Additional keyword arguments to `fn`. Several keywords are reserved for `execute`. These are: - name; The name to use when creating the execute operation. - exclusive_resource_access; Whether the resources required by `fn` should be exclusive to this `CriticalSection`. Default: `True`. You may want to set this to `False` if you will be accessing a resource in read-only mode in two different CriticalSections. Returns: The tensors returned from `fn(*args, **kwargs)`. Raises: ValueError: If `fn` attempts to use this `CriticalSection` in any nested way. ValueError: If `exclusive_resource_access` is not provided (is `True`) and another `CriticalSection` has an execution requesting the same resources as in `*args`, `**kwargs`, and any additionaly captured inputs in `fn`. Note, even if `exclusive_resource_access` is `True`, if another execution in another `CriticalSection` was created without `exclusive_resource_access=True`, a `ValueError` will be raised. """ name = kwargs.pop("name", None) exclusive_resource_access = kwargs.pop("exclusive_resource_access", True) with ops.name_scope(name, "critical_section_execute", []): lock = gen_resource_variable_ops.mutex_lock(self._handle) with ops.control_dependencies([lock]): c_known_ops = set() c_captured_tensors = set() def add_op_internal(op): c_known_ops.add(op) for i in op.inputs: if i.op not in c_known_ops: c_captured_tensors.add(i) c = function.HelperContext(add_op_internal) with c: r = fn(*args, **kwargs) resource_inputs = set([ x for x in list(nest.flatten(args)) + nest.flatten(kwargs.values()) + list(c_captured_tensors) if tensor_util.is_tensor(x) and x.dtype == dtypes.resource]) if self._handle in resource_inputs: raise ValueError("The function fn attempts to access the " "CriticalSection in which it would be running. " "This is illegal and would cause deadlocks. " "CriticalSection: %s." % self._handle) if not context.executing_eagerly(): # Collections and op introspection does not work in eager # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): sg_handle_name = ops.convert_to_tensor(sg.handle).name self_handle_name = ops.convert_to_tensor(self._handle).name if sg_handle_name == self_handle_name: # Other executions in the same critical section are allowed. continue if not (exclusive_resource_access or sg.exclusive_resource_access): # Neither execution requested exclusive access. continue resource_intersection = resource_inputs.intersection(sg.resources) if resource_intersection: raise ValueError( "This execution would access resources: %s. Either this " "lock (CriticalSection: %s) or lock '%s' " "(CriticalSection: %s) requested exclusive resource access " "of this resource. Did you mean to call execute with keyword " "argument exclusive_resource_access=False?" % (list(resource_intersection), self._handle.name, sg.op.name, sg.handle.name)) def identity(x): # pylint: disable=invalid-name if isinstance(x, tensor_array_ops.TensorArray): return x.identity() elif isinstance(x, ops.Operation): return control_flow_ops.group(x) elif context.executing_eagerly() and x is None: return None else: return array_ops.identity(x) r_flat = [identity(x) for x in nest.flatten(r)] with ops.control_dependencies(r_flat): # The identity must run on the same machine as self._handle with ops.colocate_with(self._handle): # Do not use array_ops.identity as there are special # optimizations within TensorFlow which seem to elide it # even when optimizations are disabled(!). ensure_lock_exists = gen_resource_variable_ops.consume_mutex_lock( lock) # Make sure that if any element of r is accessed, all of # them are executed together. r = nest.pack_sequence_as( r, control_flow_ops.tuple(nest.flatten(r))) with ops.control_dependencies([ensure_lock_exists]): outputs = nest.map_structure(identity, r) if not context.executing_eagerly(): signature = _ExecutionSignature( op=lock.op, handle=self._handle, resources=list(resource_inputs), exclusive_resource_access=exclusive_resource_access) ops.add_to_collections( CRITICAL_SECTION_EXECUTIONS, signature) return outputs
def mean_iou(labels, predictions, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None): """Calculate per-step mean Intersection-Over-Union (mIOU). Mean Intersection-Over-Union is a common evaluation metric for semantic image segmentation, which first computes the IOU for each semantic class and then computes the average over classes. IOU is defined as follows: IOU = true_positive / (true_positive + false_positive + false_negative). The predictions are accumulated in a confusion matrix, weighted by `weights`, and mIOU is then calculated from it. For estimation of the metric over a stream of data, the function creates 7 an `update_op` operation that updates these variables and returns the `mean_iou`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: A `Tensor` of ground truth labels with shape [batch size] and of type `int32` or `int64`. The tensor will be flattened if its rank > 1. predictions: A `Tensor` of prediction results for semantic labels, whose shape is [batch size] and type `int32` or `int64`. The tensor will be flattened if its rank > 1. num_classes: The possible number of labels the prediction task can have. This value must be provided, since a confusion matrix of dimension = [num_classes, num_classes] will be allocated. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that `mean_iou` should be added to. updates_collections: An optional list of collections `update_op` should be added to. name: An optional variable_scope name. Returns: mean_iou: A `Tensor` representing the mean intersection-over-union. update_op: An operation that increments the confusion matrix. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope( name, 'mean_iou', (predictions, labels, weights)): # Check if shape is compatible. predictions.get_shape().assert_is_compatible_with(labels.get_shape()) total_cm, update_op = _streaming_confusion_matrix(labels, predictions, num_classes, weights) reset_cm_op = tf.assign(total_cm, tf.zeros_like(total_cm, total_cm.dtype, 'reset_cm')) def compute_mean_iou(name): """Compute the mean intersection-over-union via the confusion matrix.""" sum_over_row = math_ops.to_float(math_ops.reduce_sum(total_cm, 0)) sum_over_col = math_ops.to_float(math_ops.reduce_sum(total_cm, 1)) cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) denominator = sum_over_row + sum_over_col - cm_diag # If the value of the denominator is 0, set it to 1 to avoid # zero division. denominator = array_ops.where( math_ops.greater(denominator, 0), denominator, array_ops.ones_like(denominator)) iou = math_ops.div(cm_diag, denominator) return math_ops.reduce_mean(iou, name=name), iou mean_iou_v, iou = compute_mean_iou('mean_iou') if metrics_collections: ops.add_to_collections(metrics_collections, mean_iou_v) if updates_collections: ops.add_to_collections(updates_collections, update_op) return mean_iou_v, iou, update_op, reset_cm_op
def _auc(labels, predictions, weights=None, num_thresholds=200, metrics_collections=None, updates_collections=None, curve='ROC', name=None, summation_method='trapezoidal'): """Computes the approximate AUC via a Riemann sum. Modified version of tf.metrics.auc. Add support for AUC computation of the recall curve. """ with tf.variable_scope( name, 'auc', (labels, predictions, weights)): if curve != 'ROC' and curve != 'PR' and curve != 'R': raise ValueError('curve must be either ROC, PR or R, %s unknown' % (curve)) kepsilon = 1e-7 # to account for floating point imprecisions thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)] thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] values, update_ops = _confusion_matrix_at_thresholds( labels, predictions, thresholds, weights) # Add epsilons to avoid dividing by 0. epsilon = 1.0e-6 def compute_auc(tp, fn, tn, fp, name): """Computes the roc-auc or pr-auc based on confusion counts.""" rec = tf.div(tp + epsilon, tp + fn + epsilon) if curve == 'ROC': fp_rate = tf.div(fp, fp + tn + epsilon) x = fp_rate y = rec elif curve == 'R': # recall auc x = tf.linspace(1., 0., num_thresholds) y = rec else: # curve == 'PR'. prec = tf.div(tp + epsilon, tp + fp + epsilon) x = rec y = prec if summation_method == 'trapezoidal': return tf.reduce_sum( tf.multiply(x[:num_thresholds - 1] - x[1:], (y[:num_thresholds - 1] + y[1:]) / 2.), name=name) elif summation_method == 'minoring': return tf.reduce_sum( tf.multiply(x[:num_thresholds - 1] - x[1:], tf.minimum(y[:num_thresholds - 1], y[1:])), name=name) elif summation_method == 'majoring': return tf.reduce_sum( tf.multiply(x[:num_thresholds - 1] - x[1:], tf.maximum(y[:num_thresholds - 1], y[1:])), name=name) else: raise ValueError('Invalid summation_method: %s' % summation_method) # sum up the areas of all the trapeziums auc_value = compute_auc( values['tp'], values['fn'], values['tn'], values['fp'], 'value') update_op = compute_auc( update_ops['tp'], update_ops['fn'], update_ops['tn'], update_ops['fp'], 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, auc_value) if updates_collections: ops.add_to_collections(updates_collections, update_op) return auc_value, update_op
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None): """Creates a new variable from arguments. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`. The initial value for the Variable. Must have a shape specified unless `validate_shape` is set to False. trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") if collections is None: collections = [ops.GraphKeys.VARIABLES] if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] with ops.control_dependencies(None): with ops.op_scope([initial_value], name, "Variable") as name: self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) initial_value_shape = self._initial_value.get_shape() if validate_shape and not initial_value_shape.is_fully_defined( ): raise ValueError( "initial_value must have a shape specified: %s" % self._initial_value) shape_to_set = initial_value_shape if validate_shape else [] self._variable = state_ops.variable_op( shape_to_set, self._initial_value.dtype.base_dtype, set_shape=validate_shape, name=name) with ops.colocate_with(self._variable.op): self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape).op # TODO(vrv): Change this class to not take caching_device, but # to take the op to colocate the snapshot with, so we can use # colocation rather than devices. if caching_device is not None: with ops.device(caching_device): self._snapshot = array_ops.identity(self._variable, name="read") else: with ops.colocate_with(self._variable.op): self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._caching_device = caching_device self._save_slice_info = None
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None, expected_shape=None): """Creates a new variable from arguments. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). expected_shape: Deprecated. Ignored. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ _ = expected_shape if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] with ops.control_dependencies(None): with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: if init_from_fn: # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. true_name = ops._name_from_scope_name(name) attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % true_name)])) # pylint: disable=protected-access with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), ops.device(None): self._initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype) shape = (self._initial_value.get_shape() if validate_shape else tensor_shape.unknown_shape()) self._variable = state_ops.variable_op_v2( shape, self._initial_value.dtype.base_dtype, name=name) # Or get the initial value from a Tensor or Python object. else: self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) shape = (self._initial_value.get_shape() if validate_shape else tensor_shape.unknown_shape()) # In this case, the variable op can't be created until after the # initial_value has been converted to a Tensor with a known type. self._variable = state_ops.variable_op_v2( shape, self._initial_value.dtype.base_dtype, name=name) # Manually overrides the variable's shape with the initial value's. if validate_shape: initial_value_shape = self._initial_value.get_shape() if not initial_value_shape.is_fully_defined(): raise ValueError("initial_value must have a shape specified: %s" % self._initial_value) # Assigns initial value. self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape).op # TODO(vrv): Change this class to not take caching_device, but # to take the op to colocate the snapshot with, so we can use # colocation rather than devices. if caching_device is not None: with ops.device(caching_device): self._snapshot = array_ops.identity(self._variable, name="read") else: with ops.colocate_with(self._variable.op): self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._caching_device = caching_device self._save_slice_info = None
def batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, scope=None): """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" Sergey Ioffe, Christian Szegedy Can be used as a normalizer function for conv2d and fully_connected. Args: inputs: a tensor of size `[batch_size, height, width, channels]` or `[batch_size, channels]`. decay: decay for the moving average. center: If True, subtract `beta`. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: small float added to variance to avoid dividing by zero. activation_fn: Optional activation function. updates_collections: collections to collect the update ops for computation. If None, a control dependency would be added to make sure the updates are computed. is_training: whether or not the layer is in training mode. In training mode it would accumulate the statistics of the moments into `moving_mean` and `moving_variance` using an exponential moving average with the given `decay`. When it is not in training mode then it would use the values of the `moving_mean` and the `moving_variance`. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: optional collections for the variables. outputs_collections: collections to add the outputs. scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. """ with variable_scope.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse) as sc: inputs_shape = inputs.get_shape() dtype = inputs.dtype.base_dtype axis = list(range(len(inputs_shape) - 1)) params_shape = inputs_shape[-1:] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma = variables.model_variable( 'gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, trainable=False, collections=moving_variance_collections) if is_training: # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies( [update_moving_mean, update_moving_variance]): outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: outputs = nn.batch_normalization(inputs, moving_mean, moving_variance, beta, gamma, epsilon) outputs.set_shape(inputs.get_shape()) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def _init_from_args(self, initial_value=None, initializer=None, trainable=True, collections=None, caching_device=None, name=None, dtype=None, constraint=None, synchronization=None, aggregation=None, distribute_strategy=None, invalid_key=-1): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the EmbeddingVariable. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. name: Optional name for the variable. Defaults to `'EmbeddingVariable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. @compatibility(eager) When Eager Execution is enabled, variables are never added to collections. It is not implicitly added to the GLOBAL_VARIABLES or TRAINABLE_VARIABLES collections, and the `collections` argument is ignored. @end_compatibility """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to EmbeddingVariable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") self._initializer = initializer if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] #self._save_slice_info = None with ops.init_scope(): self._in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "EmbeddingVariable", [] if init_from_fn else [initial_value], skip_on_eager=False) as name: # pylint: disable=protected-access self._invalid_key = invalid_key self._invalid_key_type = ops.convert_to_tensor( invalid_key, name="invalid_key", preferred_dtype=dtypes.int64).dtype.base_dtype handle_name = ops.name_from_scope_name(name) if self._in_graph_mode: shared_name = handle_name unique_id = shared_name else: # When in eager mode use a uid for the shared_name, to prevent # accidental sharing. unique_id = "%s_%d" % (handle_name, ops.uid()) shared_name = None # Never shared # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. device_context_manager = (ops.device if self._in_graph_mode else ops.NullContextmanager) attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % handle_name)])) with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), device_context_manager( None): if init_from_fn: initial_value = initial_value() if isinstance(initial_value, trackable.CheckpointInitialValue): self._maybe_initialize_trackable() self._update_uid = initial_value.checkpoint_position.restore_uid initial_value = initial_value.wrapped_value initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) shape = initial_value.shape handle = self._embedding_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=shared_name, name=name, graph_mode=self._in_graph_mode) # pylint: disable=protected-access if (self._in_graph_mode and initial_value is not None and initial_value.op._get_control_flow_context() is not None): raise ValueError( "Initializer for variable %s is from inside a control-flow " "construct, such as a loop or conditional. When creating a " "variable inside a loop or conditional, use a lambda as the " "initializer." % name) # pylint: enable=protected-access dtype = initial_value.dtype.base_dtype if self._in_graph_mode: with ops.name_scope("IsInitialized"): is_initialized_op = (gen_ev_ops.ev_is_initialized_op( handle, Tkeys=self._invalid_key_type)) if initial_value is not None: # pylint: disable=g-backslash-continuation with ops.name_scope("Initialize") as n, \ ops.colocate_with(None, ignore_existing=True), \ ops.device(handle.device): # pylint: disable=protected-access initializer_op = (gen_ev_ops.initialize_ev_op( handle, variables. _try_guard_against_uninitialized_dependencies( name, initial_value), ops.convert_to_tensor( invalid_key, preferred_dtype=dtypes.int64), shape=initial_value.get_shape(), name=n)) cached_value = None graph_element = None else: gen_ev_ops.initialize_ev_op( handle, initial_value, ops.convert_to_tensor(invalid_key, preferred_dtype=dtypes.int64), shape=initial_value.get_shape()) is_initialized_op = None initializer_op = None graph_element = None cached_value = None if not context.executing_eagerly(): # Eager variables are only added to collections if they are part of an # eager variable store (otherwise in an interactive session they would # hog memory and cause OOM). This is done in ops/variable_scope.py. ops.add_to_collections(collections, self) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self) initial_value = initial_value if self._in_graph_mode else None super(resource_variable_ops.ResourceVariable, self).__init__(trainable=trainable, shape=shape, dtype=dtype, handle=handle, synchronization=synchronization, constraint=constraint, aggregation=aggregation, distribute_strategy=distribute_strategy, name=name, unique_id=unique_id, handle_name=handle_name, graph_element=graph_element, initial_value=initial_value, initializer_op=initializer_op, is_initialized_op=is_initialized_op, cached_value=cached_value, caching_device=caching_device)
def batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Code modification of tensorflow/contrib/layers/python/layers/layers.py """ with variable_scope.variable_op_scope([inputs], scope, 'BatchNorm', reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype axis = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined last dimension %s.' % ( inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections(variables_collections, 'beta') beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections(variables_collections, 'gamma') gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=init_ops.zeros_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=init_ops.ones_initializer, trainable=False, collections=moving_variance_collections) # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies([update_moving_mean, update_moving_variance]): outputs = nn.batch_normalization( inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization( inputs, mean, variance, beta, gamma, epsilon) test_outputs = nn.batch_normalization( inputs, moving_mean, moving_variance, beta, gamma, epsilon) outputs = tf.cond(is_training, lambda: outputs, lambda: test_outputs) outputs.set_shape(inputs_shape) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def batch_norm_backbone(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, param_initializers=None, param_regularizers=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, batch_weights=None, fused=None, data_format=DATA_FORMAT_NHWC, zero_debias_moving_mean=False, scope=None, renorm=False, renorm_clipping=None, renorm_decay=0.99, adjustment=None, tower_config=None): """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" Sergey Ioffe, Christian Szegedy Can be used as a normalizer function for conv2d and fully_connected. The normalization is over all but the last dimension if `data_format` is `NHWC` and all but the second dimension if `data_format` is `NCHW`. In case of a 2D tensor this corresponds to the batch dimension, while in case of a 4D tensor this corresponds to the batch and space dimensions. Note: when training, the moving_mean and moving_variance need to be updated. By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they need to be added as a dependency to the `train_op`. For example: ```python update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss) ``` One can set updates_collections=None to force the updates in place, but that can have a speed penalty, especially in distributed settings. Args: inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`. The normalization is over all but the last dimension if `data_format` is `NHWC` and the second dimension if `data_format` is `NCHW`. decay: Decay for the moving average. Reasonable values for `decay` are close to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. Lower `decay` value (recommend trying `decay`=0.9) if model experiences reasonably good training performance but poor validation and/or test performance. Try zero_debias_moving_mean=True for improved stability. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. param_regularizers: Optional regularizer for beta and gamma. updates_collections: Collections to collect the update ops for computation. The updates_ops need to be executed with the train_op. If None, a control dependency would be added to make sure the updates are computed in place. is_training: Whether or not the layer is in training mode. In training mode it would accumulate the statistics of the moments into `moving_mean` and `moving_variance` using an exponential moving average with the given `decay`. When it is not in training mode then it would use the values of the `moving_mean` and the `moving_variance`. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). batch_weights: An optional tensor of shape `[batch_size]`, containing a frequency weight for each batch item. If present, then the batch normalization uses weighted mean and variance. (This can be used to correct for bias in training example selection.) fused: if `None` or `True`, use a faster, fused implementation if possible. If `False`, use the system recommended implementation. data_format: A string. `NHWC` (default) and `NCHW` are supported. zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new pair of variables 'moving_mean/biased' and 'moving_mean/local_step'. scope: Optional scope for `variable_scope`. renorm: Whether to use Batch Renormalization (https://arxiv.org/abs/1702.03275). This adds extra variables during training. The inference is the same for either value of this parameter. renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to scalar `Tensors` used to clip the renorm correction. The correction `(r, d)` is used as `corrected_value = normalized_value * r + d`, with `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, dmax are set to inf, 0, inf, respectively. renorm_decay: Momentum used to update the moving means and standard deviations with renorm. Unlike `momentum`, this affects training and should be neither too small (which would add noise) nor too large (which would give stale estimates). Note that `decay` is still applied to get the means and variances for inference. adjustment: A function taking the `Tensor` containing the (dynamic) shape of the input tensor and returning a pair (scale, bias) to apply to the normalized values (before gamma and beta), only during training. For example, `adjustment = lambda shape: ( tf.random_uniform(shape[-1:], 0.93, 1.07), tf.random_uniform(shape[-1:], -0.1, 0.1))` will scale the normalized value by up to 7% up or down, then shift the result by up to 0.1 (with independent scaling and bias for each feature but shared across all examples), and finally apply gamma and/or beta. If `None`, no adjustment is applied. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If `data_format` is neither `NHWC` nor `NCHW`. ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. """ # if fused is None: # fused = True # Only use _fused_batch_norm if all of the following three # conditions are true: # (1) fused is set True; # (2) it is possible to use (currently it doesn't support batch weights, # renorm, and the case when rank is neither 2 nor 4); # (3) it is used with zero_debias_moving_mean, or an input shape of rank 2, # or non-default updates_collections (not implemented in # normalization_layers.BatchNormalization yet); otherwise use the fused # implementation in normalization_layers.BatchNormalization. # inputs = ops.convert_to_tensor(inputs) # rank = inputs.get_shape().ndims # possible_to_fuse = ( # batch_weights is None and not renorm and rank in [2, 4] and # adjustment is None) # if fused and possible_to_fuse and ( # zero_debias_moving_mean or rank == 2 or # updates_collections is not ops.GraphKeys.UPDATE_OPS): # return _fused_batch_norm( # inputs, # decay=decay, # center=center, # scale=scale, # epsilon=epsilon, # activation_fn=activation_fn, # param_initializers=param_initializers, # param_regularizers=param_regularizers, # updates_collections=updates_collections, # is_training=is_training, # reuse=reuse, # variables_collections=variables_collections, # outputs_collections=outputs_collections, # trainable=trainable, # data_format=data_format, # zero_debias_moving_mean=zero_debias_moving_mean, # scope=scope) if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') layer_variable_getter = _build_variable_getter() with variable_scope.variable_scope( scope, 'BatchNorm', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = ops.convert_to_tensor(inputs) # # Determine whether we can use the core layer class. # if (batch_weights is None and # updates_collections is ops.GraphKeys.UPDATE_OPS and # not zero_debias_moving_mean): # print("F**K !!!!") # # Use the core layer class. # axis = 1 if data_format == DATA_FORMAT_NCHW else -1 # if not param_initializers: # param_initializers = {} # beta_initializer = param_initializers.get('beta', # init_ops.zeros_initializer()) # gamma_initializer = param_initializers.get('gamma', # init_ops.ones_initializer()) # moving_mean_initializer = param_initializers.get( # 'moving_mean', init_ops.zeros_initializer()) # moving_variance_initializer = param_initializers.get( # 'moving_variance', init_ops.ones_initializer()) # if not param_regularizers: # param_regularizers = {} # beta_regularizer = param_regularizers.get('beta') # gamma_regularizer = param_regularizers.get('gamma') # layer = normalization_layers.BatchNormalization( # axis=axis, # momentum=decay, # epsilon=epsilon, # center=center, # scale=scale, # beta_initializer=beta_initializer, # gamma_initializer=gamma_initializer, # moving_mean_initializer=moving_mean_initializer, # moving_variance_initializer=moving_variance_initializer, # beta_regularizer=beta_regularizer, # gamma_regularizer=gamma_regularizer, # trainable=trainable, # renorm=renorm, # renorm_clipping=renorm_clipping, # renorm_momentum=renorm_decay, # adjustment=adjustment, # name=sc.name, # _scope=sc, # _reuse=reuse, # fused=fused) # outputs = layer.apply(inputs, training=is_training) # # # Add variables to collections. # _add_variable_to_collections(layer.moving_mean, variables_collections, # 'moving_mean') # _add_variable_to_collections(layer.moving_variance, variables_collections, # 'moving_variance') # if layer.beta is not None: # _add_variable_to_collections(layer.beta, variables_collections, 'beta') # if layer.gamma is not None: # _add_variable_to_collections(layer.gamma, variables_collections, # 'gamma') # # if activation_fn is not None: # outputs = activation_fn(outputs) # return utils.collect_named_outputs(outputs_collections, sc.name, outputs) # Not supported by layer class: batch_weights argument, # and custom updates_collections. In that case, use the legacy BN # implementation. # Custom updates collections are not supported because the update logic # is different in this case, in particular w.r.t. "forced updates" and # update op reuse. if renorm: raise ValueError('renorm is not supported with batch_weights, ' 'updates_collections or zero_debias_moving_mean') inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype if batch_weights is not None: batch_weights = ops.convert_to_tensor(batch_weights) inputs_shape[0:1].assert_is_compatible_with(batch_weights.get_shape()) # Reshape batch weight values so they broadcast across inputs. nshape = [-1] + [1 for _ in range(inputs_rank - 1)] batch_weights = array_ops.reshape(batch_weights, nshape) if data_format == DATA_FORMAT_NCHW: moments_axes = [0] + list(range(2, inputs_rank)) params_shape = inputs_shape[1:2] # For NCHW format, rather than relying on implicit broadcasting, we # explicitly reshape the params to params_shape_broadcast when computing # the moments and the batch normalization. params_shape_broadcast = list( [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)]) else: moments_axes = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] params_shape_broadcast = None if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined channels dimension %s.' % (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if not param_initializers: param_initializers = {} if center: beta_collections = utils.get_variable_collections(variables_collections, 'beta') beta_initializer = param_initializers.get('beta', init_ops.zeros_initializer()) beta = variables.model_variable( 'beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get('gamma', init_ops.ones_initializer()) gamma = variables.model_variable( 'gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating # them, because assign_moving_average is not yet supported for partitioned # variables (this needs to be handled carefully, as it may break # the checkpoint backward compatibility). with variable_scope.variable_scope( variable_scope.get_variable_scope()) as local_scope: local_scope.set_partitioner(None) moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean_initializer = param_initializers.get( 'moving_mean', init_ops.zeros_initializer()) moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance_initializer = param_initializers.get( 'moving_variance', init_ops.ones_initializer()) moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) # If `is_training` doesn't have a constant value, because it is a `Tensor`, # a `Variable` or `Placeholder` then is_training_value will be None and # `needs_moments` will be true. is_training_value = utils.constant_value(is_training) need_moments = is_training_value is None or is_training_value if need_moments: # Calculate the moments based on the individual batch. if batch_weights is None: if data_format == DATA_FORMAT_NCHW: mean, variance = moments(inputs, moments_axes, tower_config=tower_config, keep_dims=True) mean = array_ops.reshape(mean, [-1]) variance = array_ops.reshape(variance, [-1]) else: mean, variance = moments(inputs, moments_axes, tower_config=tower_config) else: if data_format == DATA_FORMAT_NCHW: mean, variance = weighted_moments( inputs, moments_axes, batch_weights, tower_config, keep_dims=True) mean = array_ops.reshape(mean, [-1]) variance = array_ops.reshape(variance, [-1]) else: mean, variance = weighted_moments(inputs, moments_axes, batch_weights, tower_config=tower_config) moving_vars_fn = lambda: (moving_mean, moving_variance) if updates_collections is None: def _force_updates(): """Internal function forces updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False) with ops.control_dependencies( [update_moving_mean, update_moving_variance]): return array_ops.identity(mean), array_ops.identity(variance) mean, variance = utils.smart_cond(is_training, _force_updates, moving_vars_fn) else: def _delay_updates(): """Internal function that delay updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False) return update_moving_mean, update_moving_variance update_mean, update_variance = utils.smart_cond( is_training, _delay_updates, moving_vars_fn) ops.add_to_collections(updates_collections, update_mean) ops.add_to_collections(updates_collections, update_variance) # Use computed moments during training and moving_vars otherwise. vars_fn = lambda: (mean, variance) mean, variance = utils.smart_cond(is_training, vars_fn, moving_vars_fn) else: mean, variance = moving_mean, moving_variance if data_format == DATA_FORMAT_NCHW: mean = array_ops.reshape(mean, params_shape_broadcast) variance = array_ops.reshape(variance, params_shape_broadcast) if beta is not None: beta = array_ops.reshape(beta, params_shape_broadcast) if gamma is not None: gamma = array_ops.reshape(gamma, params_shape_broadcast) # Compute batch_normalization. outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) outputs.set_shape(inputs_shape) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores, remove_zero_scores=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of True and False Positive arrays. This metrics also keeps track of scores and number of grountruth objects. """ # Input dictionaries: dict outputs as streaming metrics. if isinstance(scores, dict) or isinstance(fp, dict): d_values = {} d_update_ops = {} for c in num_gbboxes.keys(): scope = 'streaming_tp_fp_%s' % c v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c], remove_zero_scores, metrics_collections, updates_collections, name=scope) d_values[c] = v d_update_ops[c] = up return d_values, d_update_ops # Input Tensors... with variable_scope.variable_scope(name, 'streaming_tp_fp', [num_gbboxes, tp, fp, scores]): num_gbboxes = math_ops.to_int64(num_gbboxes) scores = math_ops.to_float(scores) stype = tf.bool tp = tf.cast(tp, stype) fp = tf.cast(fp, stype) # Reshape TP and FP tensors and clean away 0 class values. scores = tf.reshape(scores, [-1]) tp = tf.reshape(tp, [-1]) fp = tf.reshape(fp, [-1]) # Remove TP and FP both false. mask = tf.logical_or(tp, fp) if remove_zero_scores: rm_threshold = 1e-4 mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) scores = tf.boolean_mask(scores, mask) tp = tf.boolean_mask(tp, mask) fp = tf.boolean_mask(fp, mask) ''' ftype = tf.float32 tp = tf.cast(tp, ftype) fp = tf.cast(fp, ftype) num_ = tf.cast(num_gbboxes, ftype) tp = tf.div(tf.reduce_sum(tp), tf.reduce_sum(tp) + tf.reduce_sum(fp)+ 0.0001) fp = tf.div(tf.reduce_sum(tp), tf.reduce_sum(num_)+0.0001) tp = tf.maximum(tp, 0.001) fp = tf.maximum(fp, 0.001) ''' # Local variables accumlating information over batches. v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64) v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32) v_scores = _create_local('v_scores', shape=[ 0, ]) v_tp = _create_local('v_tp', shape=[ 0, ], dtype=stype) v_fp = _create_local('v_fp', shape=[ 0, ], dtype=stype) #v_tp = _create_local('v_tp', shape=[0,], dtype=ftype) #v_fp = _create_local('v_fp', shape=[0,], dtype=ftype) # Update operations. nobjects_op = state_ops.assign_add(v_nobjects, tf.reduce_sum(num_gbboxes)) ndetections_op = state_ops.assign_add( v_ndetections, tf.size(scores, out_type=tf.int32)) scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0), validate_shape=False) tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0), validate_shape=False) fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0), validate_shape=False) # Value and update ops. val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores) with ops.control_dependencies( [nobjects_op, ndetections_op, scores_op, tp_op, fp_op]): update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op) if metrics_collections: ops.add_to_collections(metrics_collections, val) if updates_collections: ops.add_to_collections(updates_collections, update_op) return val, update_op
def collect_named_outputs(collections, alias, outputs): if collections: append_tensor_alias(outputs, alias) ops.add_to_collections(collections, outputs) return outputs
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None, constraint=None): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: Ignored. Provided for compatibility with tf.Variable. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") self._trainable = trainable if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] self._save_slice_info = None self._in_graph_mode = context.in_graph_mode() with ops.control_dependencies(None): with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access handle_name = ops._name_from_scope_name(name) if init_from_fn: # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. if self._in_graph_mode: attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % handle_name)])) with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), ops.device(None): initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype) self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, graph_mode=self._in_graph_mode) self._handle_device = ( self._handle.device if self._in_graph_mode else context.get_default_context().device_name) else: initial_value = initial_value() with ops.name_scope("Initializer"): initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, graph_mode=False, container="") self._handle_device = ( self._handle.device if self._in_graph_mode else context.get_default_context().device_name) # pylint: enable=protected-access # Or get the initial value from a Tensor or Python object. else: with ops.name_scope("Initializer"): initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) # pylint: disable=protected-access if (self._in_graph_mode and initial_value is not None and initial_value.op._get_control_flow_context() is not None): raise ValueError( "Initializer for variable %s is from inside a control-flow " "construct, such as a loop or conditional. When creating a " "variable inside a loop or conditional, use a lambda as the " "initializer." % name) # pylint: enable=protected-access self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, graph_mode=self._in_graph_mode, container="") self._handle_device = (self._handle.device if self._in_graph_mode else context.get_default_context().device_name) self._initial_value = initial_value if self._in_graph_mode else None self._handle_name = handle_name + ":0" self._dtype = initial_value.dtype.base_dtype self._constraint = constraint if self._in_graph_mode: with ops.name_scope("IsInitialized"): self._is_initialized_op = ( gen_resource_variable_ops.var_is_initialized_op(self._handle)) if initial_value is not None: with ops.name_scope("Assign") as n, ops.colocate_with(self._handle): self._initializer_op = ( gen_resource_variable_ops.assign_variable_op( self._handle, self._build_initializer_expr(initial_value), name=n)) with ops.name_scope("Read"), ops.colocate_with(self._handle): # Manually assign reads to the handle's device to avoid log # messages. with ops.device(self._handle_device): value = self._read_variable_op() self._graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() # context. At the same time, users would expect caching device to # be independent of this context, and/or would not expect the # current device context to be merged with the caching device # spec. Therefore we reset the colocation stack before creating # the cached value. Note that resetting the colocation stack will # also reset the device stack. with ops.colocate_with(None, ignore_existing=True): with ops.device(caching_device): self._cached_value = array_ops.identity(value) else: self._cached_value = None else: gen_resource_variable_ops.assign_variable_op(self._handle, initial_value) self._is_initialized_op = None self._initializer_op = None self._graph_element = None if caching_device: with ops.device(caching_device): self._cached_value = self._read_variable_op() else: self._cached_value = None ops.add_to_collections(collections, self)
def _init_from_args( self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None, expected_shape=None, ): """Creates a new variable from arguments. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. In that case, `dtype` must be specified. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). expected_shape: A TensorShape. If set, initial_value is expected to have this shape. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if init_from_fn and dtype is None: raise ValueError("dtype must also be specified when initial_value is callable.") if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections)) ) if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] expected_shape = tensor_shape.as_shape(expected_shape) with ops.control_dependencies(None): with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: # Get the initial value from a callable function. The real shape of the # variable will be set later, since under the init_from_fn case, the # shape won't be known until after the function is invoked. # # NOTE: The current Variable OpKernel does not support # partially defined shapes, so we only set the shape if it is # fully defined. For historical reasons, we use the scalar # shape (`[]`) to represent an unknown or partially known # shape. A future version of the Variable ops will remove this # limitation. def full_shape_to_list(shape): """Returns shape as a list if shape is fully defined.""" if shape and shape.is_fully_defined(): return shape.as_list() else: return [] def assert_expected_shape(): """Asserts that the initial value has the expected shape.""" if expected_shape: expected_shape.assert_is_compatible_with(self._initial_value.get_shape()) if init_from_fn: expected_shape_list = full_shape_to_list(expected_shape) set_shape = validate_shape and expected_shape.is_fully_defined() self._variable = state_ops.variable_op( expected_shape_list, dtype.base_dtype, set_shape=set_shape, name=name ) with ops.colocate_with(self._variable.op): with ops.name_scope("Initializer"): # Colocate the tensors created by the initial_value() function # with the variable itself. self._initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype ) assert_expected_shape() # Or get the initial value from a Tensor or Python object. else: self._initial_value = ops.convert_to_tensor(initial_value, name="initial_value", dtype=dtype) assert_expected_shape() set_shape = validate_shape and self._initial_value.get_shape().is_fully_defined() # In this case, the variable op can't be created until after the # initial_value has been converted to a Tensor with a known type. self._variable = state_ops.variable_op( full_shape_to_list(self._initial_value.get_shape()), self._initial_value.dtype.base_dtype, set_shape=set_shape, name=name, ) # Manually overrides the variable's shape with the initial value's. if validate_shape: initial_value_shape = self._initial_value.get_shape() if not initial_value_shape.is_fully_defined(): raise ValueError("initial_value must have a shape specified: %s" % self._initial_value) self._variable.set_shape(initial_value_shape) # TODO(b/28152992): Remove the below hack modifying the node_def shape # directly once set_shape() handles it. self._variable.op.node_def.attr["shape"].shape.CopyFrom(initial_value_shape.as_proto()) # Assigns initial value. self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape ).op # TODO(vrv): Change this class to not take caching_device, but # to take the op to colocate the snapshot with, so we can use # colocation rather than devices. if caching_device is not None: with ops.device(caching_device): self._snapshot = array_ops.identity(self._variable, name="read") else: with ops.colocate_with(self._variable.op): self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._caching_device = caching_device self._save_slice_info = None
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None, expected_shape=None): """Creates a new variable from arguments. Args: initial_value: An `Output`, or Python object convertible to an `Output`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. In that case, `dtype` must be specified. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). expected_shape: A TensorShape. If set, initial_value is expected to have this shape. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if init_from_fn and dtype is None: raise ValueError( "dtype must also be specified when initial_value is callable.") if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] expected_shape = tensor_shape.as_shape(expected_shape) with ops.control_dependencies(None): with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: # Get the initial value from a callable function. The real shape of the # variable will be set later, since under the init_from_fn case, the # shape won't be known until after the function is invoked. # # NOTE: The current Variable OpKernel does not support # partially defined shapes, so we only set the shape if it is # fully defined. For historical reasons, we use the scalar # shape (`[]`) to represent an unknown or partially known # shape. A future version of the Variable ops will remove this # limitation. def full_shape_to_list(shape): """Returns shape as a list if shape is fully defined.""" if shape and shape.is_fully_defined(): return shape.as_list() else: return [] def assert_expected_shape(): """Asserts that the initial value has the expected shape.""" if expected_shape: expected_shape.assert_is_compatible_with( self._initial_value.get_shape()) if init_from_fn: expected_shape_list = full_shape_to_list(expected_shape) set_shape = validate_shape and expected_shape.is_fully_defined() self._variable = state_ops.variable_op( expected_shape_list, dtype.base_dtype, set_shape=set_shape, name=name) with ops.colocate_with(self._variable.op): with ops.name_scope("Initializer"): # Colocate the tensors created by the initial_value() function # with the variable itself. self._initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype) assert_expected_shape() # Or get the initial value from a Tensor or Python object. else: self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) assert_expected_shape() set_shape = (validate_shape and self._initial_value.get_shape().is_fully_defined()) # In this case, the variable op can't be created until after the # initial_value has been converted to a Tensor with a known type. self._variable = state_ops.variable_op( full_shape_to_list(self._initial_value.get_shape()), self._initial_value.dtype.base_dtype, set_shape=set_shape, name=name) # Manually overrides the variable's shape with the initial value's. if validate_shape: initial_value_shape = self._initial_value.get_shape() if not initial_value_shape.is_fully_defined(): raise ValueError("initial_value must have a shape specified: %s" % self._initial_value) self._variable.set_shape(initial_value_shape) # TODO(b/28152992): Remove the below hack modifying the node_def shape # directly once set_shape() handles it. self._variable.op.node_def.attr["shape"].shape.CopyFrom( initial_value_shape.as_proto()) # Assigns initial value. self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape).op # TODO(vrv): Change this class to not take caching_device, but # to take the op to colocate the snapshot with, so we can use # colocation rather than devices. if caching_device is not None: with ops.device(caching_device): self._snapshot = array_ops.identity(self._variable, name="read") else: with ops.colocate_with(self._variable.op): self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._caching_device = caching_device self._save_slice_info = None
def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs): # pylint: disable=g-missing-docstring # Figure out what collections this variable should be added to. # We'll add the MirroredVariable to those collections instead. collections = kwargs.pop("collections", None) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] kwargs["collections"] = [] # Get synchronization value synchronization = kwargs.get("synchronization", variable_scope.VariableSynchronization.ON_WRITE) if synchronization == variable_scope.VariableSynchronization.NONE: raise ValueError("`NONE` variable synchronization mode is not " "supported with `Mirrored` distribution strategy. Please" " change the `synchronization` for variable: " + kwargs["name"]) elif synchronization == variable_scope.VariableSynchronization.ON_READ: # Variables that are to be synced on read are tower local. is_tower_local = True kwargs["trainable"] = False elif (synchronization == variable_scope.VariableSynchronization.ON_WRITE or synchronization == variable_scope.VariableSynchronization.AUTO): # `AUTO` synchronization for `MirroredStrategy` is `ON_WRITE`. is_tower_local = False else: raise ValueError("Invalid variable synchronization mode: " + synchronization + " for variable: " + kwargs["name"]) # Get aggregation value aggregation = kwargs.pop("aggregation", variable_scope.VariableAggregation.NONE) if aggregation not in ( variable_scope.VariableAggregation.NONE, variable_scope.VariableAggregation.SUM, variable_scope.VariableAggregation.MEAN, variable_scope.VariableAggregation.ONLY_FIRST_TOWER ): raise ValueError("Invalid variable aggregation mode: " + aggregation + " for variable: " + kwargs["name"]) # Ignore user-specified caching device, not needed for mirrored variables. kwargs.pop("caching_device", None) # TODO(josh11b,apassos): It would be better if variable initialization # was never recorded on the tape instead of having to do this manually # here. with tape.stop_recording(): index = real_mirrored_creator(devices, *args, **kwargs) if is_tower_local: result = values.TowerLocalVariable(index, index[devices[0]], aggregation) else: result = values.MirroredVariable(index, index[devices[0]], aggregation) # Add the wrapped variable to the requested collections. # The handling of eager mode and the global step matches # ResourceVariable._init_from_args(). if not context.executing_eagerly(): g = ops.get_default_graph() # If "trainable" is True, next_creator() will add the member variables # to the TRAINABLE_VARIABLES collection, so we manually remove # them and replace with the MirroredVariable. We can't set # "trainable" to False for next_creator() since that causes functions # like implicit_gradients to skip those variables. if kwargs.get("trainable", True): collections.append(ops.GraphKeys.TRAINABLE_VARIABLES) l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES) for v in index.values(): l.remove(v) g.add_to_collections(collections, result) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result) return result
def f1_macro(labels, predictions, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None): if context.executing_eagerly(): raise RuntimeError( 'tf1.f1_macro is not supported when eager execution is enabled.') with tf.variable_scope(name, 'f1_macro', (predictions, labels, weights)): predictions, labels, weights = _remove_squeezable_dimensions( predictions=tf.cast(predictions, dtype=tf.int32), labels=tf.cast(labels, dtype=tf.int32), weights=weights) precisions, recalls = [], [] for class_id in range(num_classes): class_labels, class_predictions = _select_class( labels=labels, predictions=predictions, class_id=class_id) precisions.append( tf.metrics.precision( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='precision_{}'.format(class_id), )) recalls.append( tf.metrics.recall( labels=class_labels, predictions=class_predictions, weights=weights, metrics_collections=None, updates_collections=None, name='recall_{}'.format(class_id), )) def compute_f1_macro(_precisions, _recalls, _name): _precision = tf.div(tf.add_n(_precisions), num_classes) _recall = tf.div(tf.add_n(_recalls), num_classes) return 2. * tf.div_no_nan( _precision * _recall, _precision + _recall, name=_name) def once_across_towers(_, _precisions, _recalls): return compute_f1_macro(_precisions, _recalls, 'value') value = _aggregate_across_towers(metrics_collections, once_across_towers, [p for p, _ in precisions], [r for r, _ in recalls]) update_op = compute_f1_macro([p for _, p in precisions], [r for _, r in recalls], 'update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) return value, update_op
def batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift" Sergey Ioffe, Christian Szegedy Can be used as a normalizer function for conv2d and fully_connected. Args: -inputs: a tensor of size `[batch_size, height, width, channels]` or `[batch_size, channels]`. -decay: decay for the moving average. -center: If True, subtract `beta`. If False, `beta` is ignored. -scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. -epsilon: small float added to variance to avoid dividing by zero. -activation_fn: Optional activation function. -updates_collections: collections to collect the update ops for computation. If None, a control dependency would be added to make sure the updates are computed. -is_training: whether or not the layer is in training mode. In training mode it would accumulate the statistics of the moments into `moving_mean` and `moving_variance` using an exponential moving average with the given `decay`. When it is not in training mode then it would use the values of the `moving_mean` and the `moving_variance`. -reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. -variables_collections: optional collections for the variables. -outputs_collections: collections to add the outputs. -trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). -scope: Optional scope for `variable_op_scope`. Returns: a tensor representing the output of the operation. """ with variable_scope.variable_op_scope([inputs],scope, 'BatchNorm', reuse=reuse) as sc: inputs_shape = inputs.get_shape() dtype = inputs.dtype.base_dtype axis = list(range(len(inputs_shape) - 1)) params_shape = inputs_shape[-1:] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if center: beta_collections = utils.get_variable_collections(variables_collections,'beta') beta = variables.model_variable('beta',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,collections=beta_collections,trainable=trainable) if scale: gamma_collections = utils.get_variable_collections(variables_collections,'gamma') gamma = variables.model_variable('gamma',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,collections=gamma_collections,trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropiate collections. moving_mean_collections = utils.get_variable_collections(variables_collections, 'moving_mean') moving_mean = variables.model_variable('moving_mean',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,trainable=False,collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections(variables_collections, 'moving_variance') moving_variance = variables.model_variable('moving_variance',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,trainable=False,collections=moving_variance_collections) if is_training: # Calculate the moments based on the individual batch. mean, variance = nn.moments(inputs, axis, shift=moving_mean) # Update the moving_mean and moving_variance moments. update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, decay) if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies([update_moving_mean,update_moving_variance]): outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. ops.add_to_collections(updates_collections, update_moving_mean) ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) else: outputs = nn.batch_normalization( inputs, moving_mean, moving_variance, beta, gamma, epsilon) outputs.set_shape(inputs.get_shape()) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def batch_norm_mine_old(inputs, decay=0.999, center=True, scale=False, epsilon=0.001, activation_fn=None, param_initializers=None, param_regularizers=None, updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, batch_weights=None, fused=False, data_format=DATA_FORMAT_NHWC, zero_debias_moving_mean=False, scope=None, renorm=False, renorm_clipping=None, renorm_decay=0.99): """ This earlier version of my modification to batch norm uses current_mean and current_variance if is_training is True and moving_mean and moving_variance otherwise. This was leading a large divergence between the results depending upon whether the is_training set to True or not. I think ideally it should always use moving_mean and moving_variance. batch_norm_mine does this. Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167. copy of tensorflow.contrib.layers Args: inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`. The normalization is over all but the last dimension if `data_format` is `NHWC` and the second dimension if `data_format` is `NCHW`. decay: Decay for the moving average. Reasonable values for `decay` are close to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. Lower `decay` value (recommend trying `decay`=0.9) if model experiences reasonably good training performance but poor validation and/or test performance. Try zero_debias_moving_mean=True for improved stability. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. param_regularizers: Optional regularizer for beta and gamma. updates_collections: Collections to collect the update ops for computation. The updates_ops need to be executed with the train_op. If None, a control dependency would be added to make sure the updates are computed in place. is_training: Whether or not the layer is in training mode. In training mode it would accumulate the statistics of the moments into `moving_mean` and `moving_variance` using an exponential moving average with the given `decay`. When it is not in training mode then it would use the values of the `moving_mean` and the `moving_variance`. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). batch_weights: An optional tensor of shape `[batch_size]`, containing a frequency weight for each batch item. If present, then the batch normalization uses weighted mean and variance. (This can be used to correct for bias in training example selection.) fused: Use nn.fused_batch_norm if True, nn.batch_normalization otherwise. data_format: A string. `NHWC` (default) and `NCHW` are supported. zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new pair of variables 'moving_mean/biased' and 'moving_mean/local_step'. scope: Optional scope for `variable_scope`. renorm: Whether to use Batch Renormalization (https://arxiv.org/abs/1702.03275). This adds extra variables during training. The inference is the same for either value of this parameter. renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to scalar `Tensors` used to clip the renorm correction. The correction `(r, d)` is used as `corrected_value = normalized_value * r + d`, with `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, dmax are set to inf, 0, inf, respectively. renorm_decay: Momentum used to update the moving means and standard deviations with renorm. Unlike `momentum`, this affects training and should be neither too small (which would add noise) nor too large (which would give stale estimates). Note that `decay` is still applied to get the means and variances for inference. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If `batch_weights` is not None and `fused` is True. ValueError: If `param_regularizers` is not None and `fused` is True. ValueError: If `data_format` is neither `NHWC` nor `NCHW`. ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. """ if fused: if batch_weights is not None: raise ValueError('Weighted mean and variance is not currently ' 'supported for fused batch norm.') if param_regularizers is not None: raise ValueError('Regularizers are not currently ' 'supported for fused batch norm.') if renorm: raise ValueError('Renorm is not supported for fused batch norm.') return _fused_batch_norm( inputs, decay=decay, center=center, scale=scale, epsilon=epsilon, activation_fn=activation_fn, param_initializers=param_initializers, updates_collections=updates_collections, is_training=is_training, reuse=reuse, variables_collections=variables_collections, outputs_collections=outputs_collections, trainable=trainable, data_format=data_format, zero_debias_moving_mean=zero_debias_moving_mean, scope=scope) if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') layer_variable_getter = _build_variable_getter() with variable_scope.variable_scope( scope, 'BatchNorm', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = ops.convert_to_tensor(inputs) # Determine whether we can use the core layer class. if (batch_weights is None and updates_collections is ops.GraphKeys.UPDATE_OPS and not zero_debias_moving_mean): # Use the core layer class. axis = 1 if data_format == DATA_FORMAT_NCHW else -1 if not param_initializers: param_initializers = {} beta_initializer = param_initializers.get('beta', init_ops.zeros_initializer()) gamma_initializer = param_initializers.get('gamma', init_ops.ones_initializer()) moving_mean_initializer = param_initializers.get( 'moving_mean', init_ops.zeros_initializer()) moving_variance_initializer = param_initializers.get( 'moving_variance', init_ops.ones_initializer()) if not param_regularizers: param_regularizers = {} beta_regularizer = param_regularizers.get('beta') gamma_regularizer = param_regularizers.get('gamma') layer = normalization_layers.BatchNormalization( axis=axis, momentum=decay, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, moving_mean_initializer=moving_mean_initializer, moving_variance_initializer=moving_variance_initializer, beta_regularizer=beta_regularizer, gamma_regularizer=gamma_regularizer, trainable=trainable, renorm=renorm, renorm_clipping=renorm_clipping, renorm_momentum=renorm_decay, name=sc.name, _scope=sc, _reuse=reuse) outputs = layer.apply(inputs, training=is_training) # Add variables to collections. _add_variable_to_collections( layer.moving_mean, variables_collections, 'moving_mean') _add_variable_to_collections( layer.moving_variance, variables_collections, 'moving_variance') if layer.beta: _add_variable_to_collections(layer.beta, variables_collections, 'beta') if layer.gamma: _add_variable_to_collections( layer.gamma, variables_collections, 'gamma') if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs) # Not supported by layer class: batch_weights argument, # and custom updates_collections. In that case, use the legacy BN # implementation. # Custom updates collections are not supported because the update logic # is different in this case, in particular w.r.t. "forced updates" and # update op reuse. if renorm: raise ValueError('renorm is not supported with batch_weights, ' 'updates_collections or zero_debias_moving_mean') inputs_shape = inputs.get_shape() inputs_rank = inputs_shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) dtype = inputs.dtype.base_dtype if batch_weights is not None: batch_weights = ops.convert_to_tensor(batch_weights) inputs_shape[0:1].assert_is_compatible_with(batch_weights.get_shape()) # Reshape batch weight values so they broadcast across inputs. nshape = [-1] + [1 for _ in range(inputs_rank - 1)] batch_weights = array_ops.reshape(batch_weights, nshape) if data_format == DATA_FORMAT_NCHW: moments_axes = [0] + list(range(2, inputs_rank)) params_shape = inputs_shape[1:2] # For NCHW format, rather than relying on implicit broadcasting, we # explicitly reshape the params to params_shape_broadcast when computing # the moments and the batch normalization. params_shape_broadcast = list( [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)]) else: moments_axes = list(range(inputs_rank - 1)) params_shape = inputs_shape[-1:] params_shape_broadcast = None if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined channels dimension %s.' % ( inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None if not param_initializers: param_initializers = {} if center: beta_collections = utils.get_variable_collections(variables_collections, 'beta') beta_initializer = param_initializers.get('beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) if scale: gamma_collections = utils.get_variable_collections(variables_collections, 'gamma') gamma_initializer = param_initializers.get('gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) # Create moving_mean and moving_variance variables and add them to the # appropriate collections. We disable variable partitioning while creating # them, because assign_moving_average is not yet supported for partitioned # variables. partitioner = variable_scope.get_variable_scope().partitioner try: variable_scope.get_variable_scope().set_partitioner(None) moving_mean_collections = utils.get_variable_collections( variables_collections, 'moving_mean') moving_mean_initializer = param_initializers.get( 'moving_mean', init_ops.zeros_initializer()) moving_mean = variables.model_variable( 'moving_mean', shape=params_shape, dtype=dtype, initializer=moving_mean_initializer, trainable=False, collections=moving_mean_collections) moving_variance_collections = utils.get_variable_collections( variables_collections, 'moving_variance') moving_variance_initializer = param_initializers.get( 'moving_variance', init_ops.ones_initializer()) moving_variance = variables.model_variable( 'moving_variance', shape=params_shape, dtype=dtype, initializer=moving_variance_initializer, trainable=False, collections=moving_variance_collections) finally: variable_scope.get_variable_scope().set_partitioner(partitioner) # If `is_training` doesn't have a constant value, because it is a `Tensor`, # a `Variable` or `Placeholder` then is_training_value will be None and # `needs_moments` will be true. is_training_value = utils.constant_value(is_training) need_moments = is_training_value is None or is_training_value if need_moments: # Calculate the moments based on the individual batch. if batch_weights is None: if data_format == DATA_FORMAT_NCHW: mean, _ = nn.moments(inputs, moments_axes, keep_dims=True) variance,_ = nn.moments( (inputs-moving_mean)**2, moments_axes, keep_dims=True) mean = array_ops.reshape(mean, [-1]) variance = array_ops.reshape(variance, [-1]) else: mean, _ = nn.moments(inputs, moments_axes) variance, _ = nn.moments( (inputs-moving_mean)**2, moments_axes) else: if data_format == DATA_FORMAT_NCHW: mean, _ = nn.weighted_moments(inputs, moments_axes, batch_weights, keep_dims=True) variance, _ = nn.weighted_moments( (inputs-moving_mean)**2, moments_axes, batch_weights, keep_dims=True) mean = array_ops.reshape(mean, [-1]) variance = array_ops.reshape(variance, [-1]) else: mean, _ = nn.weighted_moments(inputs, moments_axes, batch_weights) variance, _ = nn.weighted_moments( (inputs-moving_mean)**2, moments_axes, batch_weights) moving_vars_fn = lambda: (moving_mean, moving_variance) if updates_collections is None: def _force_updates(): """Internal function forces updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False) with ops.control_dependencies([update_moving_mean, update_moving_variance]): return array_ops.identity(mean), array_ops.identity(variance) mean, variance = utils.smart_cond(is_training, _force_updates, moving_vars_fn) else: def _delay_updates(): """Internal function that delay updates moving_vars if is_training.""" update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=zero_debias_moving_mean) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False) return update_moving_mean, update_moving_variance update_mean, update_variance = utils.smart_cond(is_training, _delay_updates, moving_vars_fn) ops.add_to_collections(updates_collections, update_mean) ops.add_to_collections(updates_collections, update_variance) # Use computed moments during training and moving_vars otherwise. vars_fn = lambda: (mean, variance) mean, variance = utils.smart_cond(is_training, vars_fn, moving_vars_fn) else: mean, variance = moving_mean, moving_variance if data_format == DATA_FORMAT_NCHW: mean = array_ops.reshape(mean, params_shape_broadcast) variance = array_ops.reshape(variance, params_shape_broadcast) beta = array_ops.reshape(beta, params_shape_broadcast) if gamma is not None: gamma = array_ops.reshape(gamma, params_shape_broadcast) # Compute batch_normalization. outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) outputs.set_shape(inputs_shape) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores, tp_tensor, fp_tensor, remove_zero_labels=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of precision / recall arrays. This metrics keeps tracks of boolean True positives and False positives arrays. """ with variable_scope.variable_scope(name, 'stream_precision_recall', [n_gbboxes, rclasses, tp_tensor, fp_tensor]): n_gbboxes = math_ops.to_int64(n_gbboxes) rclasses = math_ops.to_int64(rclasses) rscores = math_ops.to_float(rscores) stype = tf.int32 tp_tensor = tf.cast(tp_tensor, stype) fp_tensor = tf.cast(fp_tensor, stype) # Reshape TP and FP tensors and clean away 0 class values. rclasses = tf.reshape(rclasses, [-1]) rscores = tf.reshape(rscores, [-1]) tp_tensor = tf.reshape(tp_tensor, [-1]) fp_tensor = tf.reshape(fp_tensor, [-1]) if remove_zero_labels: mask = tf.greater(rclasses, 0) rclasses = tf.boolean_mask(rclasses, mask) rscores = tf.boolean_mask(rscores, mask) tp_tensor = tf.boolean_mask(tp_tensor, mask) fp_tensor = tf.boolean_mask(fp_tensor, mask) # Local variables accumlating information over batches. v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64) v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32) v_scores = _create_local('v_scores', shape=[0, ]) v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) # Update operations. nobjects_op = state_ops.assign_add(v_nobjects, tf.reduce_sum(n_gbboxes)) ndetections_op = state_ops.assign_add(v_ndetections, tf.size(rscores, out_type=tf.int32)) scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0), validate_shape=False) tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0), validate_shape=False) fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0), validate_shape=False) # Precision and recall computations. # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value') r = _precision_recall(v_nobjects, v_ndetections, v_scores, v_tp, v_fp, 'value') with ops.control_dependencies([nobjects_op, ndetections_op, scores_op, tp_op, fp_op]): update_op = _precision_recall(nobjects_op, ndetections_op, scores_op, tp_op, fp_op, 'update_op') # update_op = tf.Print(update_op, # [tf.reduce_sum(tf.cast(mask, tf.int64)), # tf.reduce_sum(tf.cast(mask2, tf.int64)), # tf.reduce_min(rscores), # tf.reduce_sum(n_gbboxes)], # 'Metric: ') # Some debugging stuff! # update_op = tf.Print(update_op, # [tf.shape(tp_op), # tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)], # 'TP and FP shape: ') # update_op[0] = tf.Print(update_op, # [nobjects_op], # '# Groundtruth bboxes: ') # update_op = tf.Print(update_op, # [update_op[0][0], # update_op[0][-1], # tf.reduce_min(update_op[0]), # tf.reduce_max(update_op[0]), # tf.reduce_min(update_op[1]), # tf.reduce_max(update_op[1])], # 'Precision and recall :') if metrics_collections: ops.add_to_collections(metrics_collections, r) if updates_collections: ops.add_to_collections(updates_collections, update_op) return r, update_op
def execute(self, fn, *args, **kwargs): """Execute function `fn(*args, **kwargs)` inside the CriticalSection. Args: fn: The function to execute. Must return at least one tensor. *args: Additional positional arguments to `fn`. **kwargs: Additional keyword arguments to `fn`. Several keywords are reserved for `execute`. These are: - name; The name to use when creating the execute operation. - exclusive_resource_access; Whether the resources required by `fn` should be exclusive to this `CriticalSection`. Default: `True`. You may want to set this to `False` if you will be accessing a resource in read-only mode in two different CriticalSections. Returns: The tensors returned from `fn(*args, **kwargs)`. Raises: ValueError: If `fn` attempts to use this `CriticalSection` in any nested way. ValueError: If `exclusive_resource_access` is not provided (is `True`) and another `CriticalSection` has an execution requesting the same resources as in `*args`, `**kwargs`, and any additionaly captured inputs in `fn`. Note, even if `exclusive_resource_access` is `True`, if another execution in another `CriticalSection` was created without `exclusive_resource_access=True`, a `ValueError` will be raised. """ name = kwargs.pop("name", None) exclusive_resource_access = kwargs.pop("exclusive_resource_access", True) args = nest.map_structure(ops.convert_to_tensor, args) with ops.name_scope(name, "critical_section_execute", []): fn_op = function.make_defun_op(fn, *args, **kwargs) flat_dtypes = nest.flatten(fn_op.output_dtypes) flat_shapes = nest.flatten(fn_op.output_shapes) all_inputs = nest.flatten(args) + fn_op.captured_inputs if self._handle in all_inputs: raise ValueError("The function fn attempts to access the " "CriticalSection in which it would be running. This " "is illegal and would cause deadlocks. " "CriticalSection: %s." % self._handle) if context.in_graph_mode(): # Collections and op introspection does not work in eager # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. all_input_resources = [ x for x in all_inputs if x.dtype == dtypes.resource] for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): if sg.op.inputs[0].name == self._handle.name: # Other executions in the same critical section are allowed. continue if not (exclusive_resource_access or sg.exclusive_resource_access): # Neither execution requested exclusive access. continue sg_input_names = [y.name for y in sg.op.inputs[1:]] for res in all_input_resources: if res.name in sg_input_names: raise ValueError( "This execution would access resource %s; but either this " "execution (CriticalSection: %s) or Execution '%s' " "(CriticalSection: %s) requested exclusive resource access " "of this resource for their critical section. Did you mean " "to call execute with keyword argument " "exclusive_resource_access=False?" % (res.name, self.name, sg.op.name, sg.op.inputs[0].op.name)) flat_outputs = gen_resource_variable_ops.execute_in_critical_section( critical_section=self._handle, arguments=all_inputs, f=fn_op, output_types=flat_dtypes, output_shapes=flat_shapes) if context.in_graph_mode(): if isinstance(flat_outputs, ops.Operation): flat_outputs = [flat_outputs] op = (flat_outputs[0].op if isinstance(flat_outputs[0], ops.Tensor) else flat_outputs[0]) signature = _ExecutionSignature( op=op, exclusive_resource_access=exclusive_resource_access) ops.add_to_collections( CRITICAL_SECTION_EXECUTIONS, signature) return (flat_outputs[0] if (len(flat_outputs) == 1 and isinstance(flat_outputs[0], ops.Operation)) else nest.pack_sequence_as(fn_op.output_dtypes, flat_outputs))
def execute(self, fn, exclusive_resource_access=True, name=None): """Execute function `fn()` inside the critical section. `fn` should not accept any arguments. To add extra arguments to when calling `fn` in the critical section, create a lambda: ```python critical_section.execute(lambda: fn(*my_args, **my_kwargs)) ``` Args: fn: The function to execute. Must return at least one tensor. exclusive_resource_access: Whether the resources required by `fn` should be exclusive to this `CriticalSection`. Default: `True`. You may want to set this to `False` if you will be accessing a resource in read-only mode in two different CriticalSections. name: The name to use when creating the execute operation. Returns: The tensors returned from `fn()`. Raises: ValueError: If `fn` attempts to lock this `CriticalSection` in any nested or lazy way that may cause a deadlock. ValueError: If `exclusive_resource_access == True` and another `CriticalSection` has an execution requesting the same resources as `fn``. Note, even if `exclusive_resource_access` is `True`, if another execution in another `CriticalSection` was created without `exclusive_resource_access=True`, a `ValueError` will be raised. """ with ops.name_scope(name, "critical_section_execute", []): # Ensure that mutex locking only happens *after* all args and # kwargs have been executed. This avoids certain types of deadlocks. lock = gen_resource_variable_ops.mutex_lock(self._handle) if not context.executing_eagerly(): # NOTE(ebrevdo): This is to ensure we don't pick up spurious # Operations created by other threads. with ops.get_default_graph()._lock: # pylint: disable=protected-access existing_ops = ops.get_default_graph().get_operations() with ops.control_dependencies([lock]): r = fn() # TODO(ebrevdo): If creating critical sections in a python loop, this # makes graph creation time quadratic. Revisit if this # becomes a problem. created_ops = (set(ops.get_default_graph().get_operations()) .difference(existing_ops)) else: with ops.control_dependencies([lock]): r = fn() if not context.executing_eagerly(): self._add_control_dependencies_to_lock(created_ops, lock.op) # captured_resources is a list of resources that are directly # accessed only by ops created during fn(), not by any # ancestors of those ops in the graph. captured_resources = set([ input_ for op in created_ops for input_ in op.inputs if input_.dtype == dtypes.resource ]) # NOTE(ebrevdo): The only time self._is_self_handle() is True # in this call is if one of the recently created ops, within # the execute(), themselves attempt to access the # CriticalSection. This will cause a deadlock. if any(self._is_self_handle(x) for x in captured_resources): raise ValueError("The function fn attempts to directly access the " "CriticalSection in which it would be running. " "This is illegal and would cause deadlocks.") self._check_multiple_access_to_resources( captured_resources, exclusive_resource_access) r_flat = [_identity(x) for x in nest.flatten(r)] with ops.control_dependencies(r_flat): # The identity must run on the same machine as self._handle with ops.colocate_with(self._handle): # Do not use array_ops.identity as there are special # optimizations within TensorFlow which seem to elide it # even when optimizations are disabled(!). ensure_lock_exists = gen_resource_variable_ops.consume_mutex_lock( lock) # Make sure that if any element of r is accessed, all of # them are executed together. r = nest.pack_sequence_as(r, control_flow_ops.tuple(nest.flatten(r))) with ops.control_dependencies([ensure_lock_exists]): outputs = nest.map_structure(_identity, r) if not context.executing_eagerly(): signature = _ExecutionSignature( op=lock.op, handle=self._handle, resources=list(captured_resources), exclusive_resource_access=exclusive_resource_access) ops.add_to_collections( CRITICAL_SECTION_EXECUTIONS, signature) return outputs
def f1_score(labels, predictions, weights=None, num_thresholds=200, metrics_collections=None, updates_collections=None, name=None): """Computes the approximately best F1-score across different thresholds. The f1_score function applies a range of thresholds to the predictions to convert them from [0, 1] to bool. Precision and recall are computed by comparing them to the labels. The F1-Score is then defined as 2 * precision * recall / (precision + recall). The best one across the thresholds is returned. Disclaimer: In practice it may be desirable to choose the best threshold on the validation set and evaluate the F1 score with this threshold on a separate test set. Or it may be desirable to use a fixed threshold (e.g. 0.5). This function internally creates four local variables, `true_positives`, `true_negatives`, `false_positives` and `false_negatives` that are used to compute the pairs of recall and precision values for a linearly spaced set of thresholds from which the best f1-score is derived. This value is ultimately returned as `f1-score`, an idempotent operation that computes the F1-score (computed using the aforementioned variables). The `num_thresholds` variable controls the degree of discretization with larger numbers of thresholds more closely approximating the true best F1-score. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the F1-score. Example usage with a custom estimator: def model_fn(features, labels, mode): predictions = make_predictions(features) loss = make_loss(predictions, labels) train_op = tf.contrib.training.create_train_op( total_loss=loss, optimizer='Adam') eval_metric_ops = {'f1': f1_score(labels, predictions)} return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs) estimator = tf.estimator.Estimator(model_fn=model_fn) If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: A `Tensor` whose shape matches `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). num_thresholds: The number of thresholds to use when discretizing the roc curve. metrics_collections: An optional list of collections that `f1_score` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: f1_score: A scalar `Tensor` representing the current best f1-score across different thresholds. update_op: An operation that increments the `true_positives`, `true_negatives`, `false_positives` and `false_negatives` variables appropriately and whose value matches the `f1_score`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope( name, 'f1', (labels, predictions, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions=predictions, labels=labels, weights=weights) # To account for floating point imprecisions / avoid division by zero. epsilon = 1e-7 thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)] thresholds = [0.0 - epsilon] + thresholds + [1.0 + epsilon] # Confusion matrix. values, update_ops = metrics_impl._confusion_matrix_at_thresholds( # pylint: disable=protected-access labels, predictions, thresholds, weights, includes=('tp', 'fp', 'fn')) # Compute precision and recall at various thresholds. def compute_best_f1_score(tp, fp, fn, name): precision_at_t = math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) # Compute F1 score. f1_at_thresholds = ( 2.0 * precision_at_t * recall_at_t / (precision_at_t + recall_at_t + epsilon)) return math_ops.reduce_max(f1_at_thresholds) def f1_across_replicas(_, values): best_f1 = compute_best_f1_score(tp=values['tp'], fp=values['fp'], fn=values['fn'], name='value') if metrics_collections: ops.add_to_collections(metrics_collections, best_f1) return best_f1 best_f1 = distribution_strategy_context.get_replica_context().merge_call( f1_across_replicas, values) update_op = compute_best_f1_score(tp=update_ops['tp'], fp=update_ops['fp'], fn=update_ops['fn'], name='update') if updates_collections: ops.add_to_collections(updates_collections, update_op) return best_f1, update_op
def _auc(labels, predictions, weights=None, num_thresholds=200, metrics_collections=None, updates_collections=None, curve='ROC', name=None, summation_method='trapezoidal'): """Computes the approximate AUC via a Riemann sum. Modified version of tf.metrics.auc. Add support for AUC computation of the recall curve. """ with tf.variable_scope(name, 'auc', (labels, predictions, weights)): if curve != 'ROC' and curve != 'PR' and curve != 'R': raise ValueError('curve must be either ROC, PR or R, %s unknown' % (curve)) kepsilon = 1e-7 # to account for floating point imprecisions thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)] thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] values, update_ops = _confusion_matrix_at_thresholds( labels, predictions, thresholds, weights) # Add epsilons to avoid dividing by 0. epsilon = 1.0e-6 def compute_auc(tp, fn, tn, fp, name): """Computes the roc-auc or pr-auc based on confusion counts.""" rec = tf.div(tp + epsilon, tp + fn + epsilon) if curve == 'ROC': fp_rate = tf.div(fp, fp + tn + epsilon) x = fp_rate y = rec elif curve == 'R': # recall auc x = tf.linspace(1., 0., num_thresholds) y = rec else: # curve == 'PR'. prec = tf.div(tp + epsilon, tp + fp + epsilon) x = rec y = prec if summation_method == 'trapezoidal': return tf.reduce_sum(tf.multiply( x[:num_thresholds - 1] - x[1:], (y[:num_thresholds - 1] + y[1:]) / 2.), name=name) elif summation_method == 'minoring': return tf.reduce_sum(tf.multiply( x[:num_thresholds - 1] - x[1:], tf.minimum(y[:num_thresholds - 1], y[1:])), name=name) elif summation_method == 'majoring': return tf.reduce_sum(tf.multiply( x[:num_thresholds - 1] - x[1:], tf.maximum(y[:num_thresholds - 1], y[1:])), name=name) else: raise ValueError('Invalid summation_method: %s' % summation_method) # sum up the areas of all the trapeziums auc_value = compute_auc(values['tp'], values['fn'], values['tn'], values['fp'], 'value') update_op = compute_auc(update_ops['tp'], update_ops['fn'], update_ops['tn'], update_ops['fp'], 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, auc_value) if updates_collections: ops.add_to_collections(updates_collections, update_op) return auc_value, update_op
def _apply_activation(y, activation_fn, output_collections): if activation_fn: y = activation_fn(y) ops.add_to_collections(list(output_collections or []) + [ops.GraphKeys.ACTIVATIONS], y) return y
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, name=None): """Creates a new variable from arguments. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`. The initial value for the Variable. Must have a shape specified unless `validate_shape` is set to False. trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.VARIABLES]`. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. """ if initial_value is None: raise ValueError("initial_value must be specified.") if collections is None: collections = [ops.GraphKeys.VARIABLES] if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] with ops.control_dependencies(None): with ops.op_scope([initial_value], name, "Variable") as name: self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value") initial_value_shape = self._initial_value.get_shape() if validate_shape and not initial_value_shape.is_fully_defined( ): raise ValueError( "initial_value must have a shape specified: %s" % self._initial_value) shape_to_set = initial_value_shape if validate_shape else [] self._variable = state_ops.variable_op( shape_to_set, self._initial_value.dtype.base_dtype, set_shape=validate_shape, name=name) with ops.device(self._variable.device): self._initializer_op = state_ops.assign( self._variable, self._initial_value, validate_shape=validate_shape).op self._snapshot = array_ops.identity(self._variable, name="read") ops.add_to_collections(collections, self) self._save_slice_info = None
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None, constraint=None): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: Ignored. Provided for compatibility with tf.Variable. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. @compatibility(eager) When Eager Execution is enabled, variables are never added to collections. It is not implicitly added to the `GLOBAL_VARIABLES` or `TRAINABLE_VARIABLES` collections, and the `collections` argument is ignored. @end_compatibility """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") if isinstance(initial_value, checkpointable.CheckpointInitialValue): self._maybe_initialize_checkpointable() self._update_uid = initial_value.checkpoint_position.restore_uid initial_value = initial_value.wrapped_value self._trainable = trainable if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] self._save_slice_info = None # Store the graph key so optimizers know how to only retrieve variables from # this graph. self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access with ops.init_scope(): self._in_graph_mode = context.in_graph_mode() with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access handle_name = ops._name_from_scope_name(name) if init_from_fn: # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. if self._in_graph_mode: attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % handle_name)])) with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), ops.device(None): initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype) self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, graph_mode=self._in_graph_mode) self._handle_device = ( self._handle.device if self._in_graph_mode else context.get_default_context().device_name) self._shape = initial_value.get_shape() else: initial_value = initial_value() with ops.name_scope("Initializer"): initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, graph_mode=False) self._handle_device = ( self._handle.device if self._in_graph_mode else context.get_default_context().device_name) self._shape = initial_value.get_shape() # pylint: enable=protected-access # Or get the initial value from a Tensor or Python object. else: with ops.name_scope("Initializer"): initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) # pylint: disable=protected-access if (self._in_graph_mode and initial_value is not None and initial_value.op._get_control_flow_context() is not None): raise ValueError( "Initializer for variable %s is from inside a control-flow " "construct, such as a loop or conditional. When creating a " "variable inside a loop or conditional, use a lambda as the " "initializer." % name) # pylint: enable=protected-access self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, graph_mode=self._in_graph_mode) self._handle_device = (self._handle.device if self._in_graph_mode else context.get_default_context().device_name) self._shape = initial_value.get_shape() self._initial_value = initial_value if self._in_graph_mode else None self._handle_name = handle_name + ":0" self._dtype = initial_value.dtype.base_dtype self._constraint = constraint if self._in_graph_mode: with ops.name_scope("IsInitialized"): self._is_initialized_op = ( gen_resource_variable_ops.var_is_initialized_op(self._handle)) if initial_value is not None: with ops.name_scope("Assign") as n, ops.colocate_with(self._handle): self._initializer_op = ( gen_resource_variable_ops.assign_variable_op( self._handle, self._try_guard_against_uninitialized_dependencies( initial_value), name=n)) with ops.name_scope("Read"), ops.colocate_with(self._handle): # Manually assign reads to the handle's device to avoid log # messages. with ops.device(self._handle_device): value = self._read_variable_op() self._graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() # context. At the same time, users would expect caching device to # be independent of this context, and/or would not expect the # current device context to be merged with the caching device # spec. Therefore we reset the colocation stack before creating # the cached value. Note that resetting the colocation stack will # also reset the device stack. with ops.colocate_with(None, ignore_existing=True): with ops.device(caching_device): self._cached_value = array_ops.identity(value) else: self._cached_value = None else: gen_resource_variable_ops.assign_variable_op(self._handle, initial_value) self._is_initialized_op = None self._initializer_op = None self._graph_element = None if caching_device: with ops.device(caching_device): self._cached_value = self._read_variable_op() else: self._cached_value = None if context.in_graph_mode(): ops.add_to_collections(collections, self) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self) if not self._in_graph_mode: # After the handle has been created, set up a way to clean it up when # executing eagerly. We'll hold the only reference to the deleter, so that # when this object is garbage collected the deleter will be too. This # means ResourceVariables can be part of reference cycles without those # cycles being uncollectable, and means that no __del__ will be defined at # all in graph mode. self._handle_deleter = EagerResourceDeleter( handle=self._handle, handle_device=self._handle_device)
def execute(self, fn, *args, **kwargs): """Execute function `fn(*args, **kwargs)` inside the CriticalSection. Args: fn: The function to execute. Must return at least one tensor. *args: Additional positional arguments to `fn`. **kwargs: Additional keyword arguments to `fn`. Several keywords are reserved for `execute`. These are: - name; The name to use when creating the execute operation. - exclusive_resource_access; Whether the resources required by `fn` should be exclusive to this `CriticalSection`. Default: `True`. You may want to set this to `False` if you will be accessing a resource in read-only mode in two different CriticalSections. Returns: The tensors returned from `fn(*args, **kwargs)`. Raises: ValueError: If `fn` attempts to use this `CriticalSection` in any nested way. ValueError: If `exclusive_resource_access` is not provided (is `True`) and another `CriticalSection` has an execution requesting the same resources as in `*args`, `**kwargs`, and any additionaly captured inputs in `fn`. Note, even if `exclusive_resource_access` is `True`, if another execution in another `CriticalSection` was created without `exclusive_resource_access=True`, a `ValueError` will be raised. """ name = kwargs.pop("name", None) exclusive_resource_access = kwargs.pop("exclusive_resource_access", True) with ops.name_scope(name, "critical_section_execute", []): lock = gen_resource_variable_ops.mutex_lock(self._handle) with ops.control_dependencies([lock]): c_known_ops = set() c_captured_tensors = set() def add_op_internal(op): c_known_ops.add(op) for i in op.inputs: if i.op not in c_known_ops: c_captured_tensors.add(i) c = function.HelperContext(add_op_internal) with c: r = fn(*args, **kwargs) resource_inputs = set([ x for x in list(nest.flatten(args)) + nest.flatten(kwargs.values()) + list(c_captured_tensors) if tensor_util.is_tensor(x) and x.dtype == dtypes.resource ]) if self._handle in resource_inputs: raise ValueError( "The function fn attempts to access the " "CriticalSection in which it would be running. " "This is illegal and would cause deadlocks. " "CriticalSection: %s." % self._handle) if not context.executing_eagerly(): # Collections and op introspection does not work in eager # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): sg_handle_name = ops.convert_to_tensor(sg.handle).name self_handle_name = ops.convert_to_tensor(self._handle).name if sg_handle_name == self_handle_name: # Other executions in the same critical section are allowed. continue if not (exclusive_resource_access or sg.exclusive_resource_access): # Neither execution requested exclusive access. continue resource_intersection = resource_inputs.intersection( sg.resources) if resource_intersection: raise ValueError( "This execution would access resources: %s. Either this " "lock (CriticalSection: %s) or lock '%s' " "(CriticalSection: %s) requested exclusive resource access " "of this resource. Did you mean to call execute with keyword " "argument exclusive_resource_access=False?" % (list(resource_intersection), self._handle.name, sg.op.name, sg.handle.name)) def identity(x): # pylint: disable=invalid-name if isinstance(x, tensor_array_ops.TensorArray): return x.identity() elif isinstance(x, ops.Operation): return control_flow_ops.group(x) elif context.executing_eagerly() and x is None: return None else: return array_ops.identity(x) r_flat = [identity(x) for x in nest.flatten(r)] with ops.control_dependencies(r_flat): # The identity must run on the same machine as self._handle with ops.colocate_with(self._handle): # Do not use array_ops.identity as there are special # optimizations within TensorFlow which seem to elide it # even when optimizations are disabled(!). ensure_lock_exists = gen_resource_variable_ops.consume_mutex_lock( lock) # Make sure that if any element of r is accessed, all of # them are executed together. r = nest.pack_sequence_as( r, control_flow_ops.tuple(nest.flatten(r))) with ops.control_dependencies([ensure_lock_exists]): outputs = nest.map_structure(identity, r) if not context.executing_eagerly(): signature = _ExecutionSignature( op=lock.op, handle=self._handle, resources=list(resource_inputs), exclusive_resource_access=exclusive_resource_access) ops.add_to_collections(CRITICAL_SECTION_EXECUTIONS, signature) return outputs
def streaming_pearson_correlation(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes Pearson correlation coefficient between `predictions`, `labels`. The `streaming_pearson_correlation` function delegates to `streaming_covariance` the tracking of three [co]variances: - `streaming_covariance(predictions, labels)`, i.e. covariance - `streaming_covariance(predictions, predictions)`, i.e. variance - `streaming_covariance(labels, labels)`, i.e. variance The product-moment correlation ultimately returned is an idempotent operation `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`. To facilitate correlation computation across multiple batches, the function groups the `update_op`s of the underlying streaming_covariance and returns an `update_op`. If `weights` is not None, then it is used to compute a weighted correlation. NOTE: these weights are treated as "frequency weights", as opposed to "reliability weights". See discussion of the difference on https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance Args: predictions: A `Tensor` of arbitrary size. labels: A `Tensor` of the same size as predictions. weights: Optional `Tensor` indicating the frequency with which an example is sampled. Rank must be 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that the metric value variable should be added to. updates_collections: An optional list of collections that the metric update ops should be added to. name: An optional variable_scope name. Returns: pearson_r: A `Tensor` representing the current Pearson product-moment correlation coefficient, the value of `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`. update_op: An operation that updates the underlying variables appropriately. Raises: ValueError: If `labels` and `predictions` are of different sizes, or if `weights` is the wrong size, or if either `metrics_collections` or `updates_collections` are not a `list` or `tuple`. """ with variable_scope.variable_scope(name, 'pearson_r', (predictions, labels, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions, labels, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) # Broadcast weights here to avoid duplicate broadcasting in each call to # `streaming_covariance`. if weights is not None: weights = weights_broadcast_ops.broadcast_weights(weights, labels) cov, update_cov = streaming_covariance(predictions, labels, weights=weights, name='covariance') var_predictions, update_var_predictions = streaming_covariance( predictions, predictions, weights=weights, name='variance_predictions') var_labels, update_var_labels = streaming_covariance( labels, labels, weights=weights, name='variance_labels') pearson_r = math_ops.truediv(cov, math_ops.multiply( math_ops.sqrt(var_predictions), math_ops.sqrt(var_labels)), name='pearson_r') update_op = math_ops.truediv(update_cov, math_ops.multiply( math_ops.sqrt(update_var_predictions), math_ops.sqrt(update_var_labels)), name='update_op') if metrics_collections: ops.add_to_collections(metrics_collections, pearson_r) if updates_collections: ops.add_to_collections(updates_collections, update_op) return pearson_r, update_op
def streaming_covariance(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes the unbiased sample covariance between `predictions` and `labels`. The `streaming_covariance` function creates four local variables, `comoment`, `mean_prediction`, `mean_label`, and `count`, which are used to compute the sample covariance between predictions and labels across multiple batches of data. The covariance is ultimately returned as an idempotent operation that simply divides `comoment` by `count` - 1. We use `count` - 1 in order to get an unbiased estimate. The algorithm used for this online computation is described in https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. Specifically, the formula used to combine two sample comoments is `C_AB = C_A + C_B + (E[x_A] - E[x_B]) * (E[y_A] - E[y_B]) * n_A * n_B / n_AB` The comoment for a single batch of data is simply `sum((x - E[x]) * (y - E[y]))`, optionally weighted. If `weights` is not None, then it is used to compute weighted comoments, means, and count. NOTE: these weights are treated as "frequency weights", as opposed to "reliability weights". See discussion of the difference on https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance To facilitate the computation of covariance across multiple batches of data, the function creates an `update_op` operation, which updates underlying variables and returns the updated covariance. Args: predictions: A `Tensor` of arbitrary size. labels: A `Tensor` of the same size as `predictions`. weights: Optional `Tensor` indicating the frequency with which an example is sampled. Rank must be 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that the metric value variable should be added to. updates_collections: An optional list of collections that the metric update ops should be added to. name: An optional variable_scope name. Returns: covariance: A `Tensor` representing the current unbiased sample covariance, `comoment` / (`count` - 1). update_op: An operation that updates the local variables appropriately. Raises: ValueError: If labels and predictions are of different sizes or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope(name, 'covariance', (predictions, labels, weights)): predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions, labels, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) count_ = metrics_impl.metric_variable([], dtypes.float32, name='count') mean_prediction = metrics_impl.metric_variable([], dtypes.float32, name='mean_prediction') mean_label = metrics_impl.metric_variable([], dtypes.float32, name='mean_label') comoment = metrics_impl.metric_variable( # C_A in update equation [], dtypes.float32, name='comoment') if weights is None: batch_count = math_ops.cast(array_ops.size(labels), dtypes.float32) # n_B in eqn weighted_predictions = predictions weighted_labels = labels else: weights = weights_broadcast_ops.broadcast_weights(weights, labels) batch_count = math_ops.reduce_sum(weights) # n_B in eqn weighted_predictions = math_ops.multiply(predictions, weights) weighted_labels = math_ops.multiply(labels, weights) update_count = state_ops.assign_add(count_, batch_count) # n_AB in eqn prev_count = update_count - batch_count # n_A in update equation # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation batch_mean_prediction = math_ops.div_no_nan( math_ops.reduce_sum(weighted_predictions), batch_count) delta_mean_prediction = math_ops.div_no_nan( (batch_mean_prediction - mean_prediction) * batch_count, update_count) update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) # prev_mean_prediction is E[x_A] in the update equation prev_mean_prediction = update_mean_prediction - delta_mean_prediction # batch_mean_label is E[y_B] in the update equation batch_mean_label = math_ops.div_no_nan( math_ops.reduce_sum(weighted_labels), batch_count) delta_mean_label = math_ops.div_no_nan( (batch_mean_label - mean_label) * batch_count, update_count) update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation prev_mean_label = update_mean_label - delta_mean_label unweighted_batch_coresiduals = ((predictions - batch_mean_prediction) * (labels - batch_mean_label)) # batch_comoment is C_B in the update equation if weights is None: batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals) else: batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals * weights) # View delta_comoment as = C_AB - C_A in the update equation above. # Since C_A is stored in a var, by how much do we need to increment that var # to make the var = C_AB? delta_comoment = (batch_comoment + (prev_mean_prediction - batch_mean_prediction) * (prev_mean_label - batch_mean_label) * (prev_count * batch_count / update_count)) update_comoment = state_ops.assign_add(comoment, delta_comoment) covariance = array_ops.where(math_ops.less_equal(count_, 1.), float('nan'), math_ops.truediv(comoment, count_ - 1), name='covariance') with ops.control_dependencies([update_comoment]): update_op = array_ops.where(math_ops.less_equal(count_, 1.), float('nan'), math_ops.truediv(comoment, count_ - 1), name='update_op') if metrics_collections: ops.add_to_collections(metrics_collections, covariance) if updates_collections: ops.add_to_collections(updates_collections, update_op) return covariance, update_op
def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores, remove_zero_scores=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of True and False Positive arrays. This metrics also keeps track of scores and number of grountruth objects. """ # Input dictionaries: dict outputs as streaming metrics. if isinstance(scores, dict) or isinstance(fp, dict): d_values = {} d_update_ops = {} for c in num_gbboxes.keys(): scope = 'streaming_tp_fp_%s' % c v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c], remove_zero_scores, metrics_collections, updates_collections, name=scope) d_values[c] = v d_update_ops[c] = up return d_values, d_update_ops # Input Tensors... with variable_scope.variable_scope(name, 'streaming_tp_fp', [num_gbboxes, tp, fp, scores]): num_gbboxes = math_ops.to_int64(num_gbboxes) scores = math_ops.to_float(scores) stype = tf.bool tp = tf.cast(tp, stype) fp = tf.cast(fp, stype) # Reshape TP and FP tensors and clean away 0 class values. scores = tf.reshape(scores, [-1]) tp = tf.reshape(tp, [-1]) fp = tf.reshape(fp, [-1]) # Remove TP and FP both false. mask = tf.logical_or(tp, fp) if remove_zero_scores: rm_threshold = 1e-4 mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) scores = tf.boolean_mask(scores, mask) tp = tf.boolean_mask(tp, mask) fp = tf.boolean_mask(fp, mask) # Local variables accumlating information over batches. v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64) v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32) v_scores = _create_local('v_scores', shape=[0, ]) v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) # Update operations. nobjects_op = state_ops.assign_add(v_nobjects, tf.reduce_sum(num_gbboxes)) ndetections_op = state_ops.assign_add(v_ndetections, tf.size(scores, out_type=tf.int32)) scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0), validate_shape=False) tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0), validate_shape=False) fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0), validate_shape=False) # Value and update ops. val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores) with ops.control_dependencies([nobjects_op, ndetections_op, scores_op, tp_op, fp_op]): update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op) if metrics_collections: ops.add_to_collections(metrics_collections, val) if updates_collections: ops.add_to_collections(updates_collections, update_op) return val, update_op