def call(self, inputs): self._maybe_freeze_vocab_size() inputs = self._standardize_inputs(inputs, self._key_dtype) original_shape = inputs.shape # Some ops will not handle scalar input, so uprank to rank 1. if inputs.shape.rank == 0: inputs = self._expand_dims(inputs, -1) if tf_utils.is_sparse(inputs): lookups = tf.SparseTensor(inputs.indices, self._lookup_dense(inputs.values), inputs.dense_shape) elif tf_utils.is_ragged(inputs): lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs) else: lookups = self._lookup_dense(inputs) if self.output_mode == INT: # If we received a scalar input, downrank back to a scalar. if original_shape.rank == 0: lookups = tf.squeeze(lookups, -1) return lookups depth = (self.max_tokens if self.pad_to_max_tokens else self._frozen_vocab_size) idf_weights = self.idf_weights_const if self.output_mode == TF_IDF else None return utils.encode_categorical_inputs(lookups, output_mode=self.output_mode, depth=depth, dtype=self.compute_dtype, sparse=self.sparse, idf_weights=idf_weights)
def call(self, inputs): bins = [tf.cast(tf.compat.v1.squeeze(self.bins), tf.float32)] def _bucketize_fn(inputs): return tf.raw_ops.BoostedTreesBucketize( float_values=[tf.cast(inputs, tf.float32)], bucket_boundaries=bins)[0] if tf_utils.is_ragged(inputs): integer_buckets = tf.ragged.map_flat_values( _bucketize_fn, inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return tf.identity(integer_buckets) elif isinstance(inputs, tf.SparseTensor): return tf.SparseTensor( indices=tf.identity(inputs.indices), values=_bucketize_fn(inputs.values), dense_shape=tf.identity(inputs.dense_shape)) else: static_shape = inputs.get_shape() if any(dim is None for dim in static_shape.as_list()[1:]): raise NotImplementedError( "Discretization Layer requires known non-batch shape," "found {}".format(static_shape)) dynamic_shape = tf.shape(inputs) # BoostedTreesBucketize only handles rank 1 inputs. We need to flatten our # inputs after batch size and vectorized_map over each sample. reshaped = tf.reshape(inputs, [dynamic_shape[0], -1]) return tf.reshape( tf.vectorized_map(_bucketize_fn, reshaped), dynamic_shape)
def update_state(self, data): if self._has_input_vocabulary: raise ValueError( "Cannot adapt {} layer after setting a static vocabulary via init " "argument or `set_vocabulary`.".format( self.__class__.__name__)) data = self._standardize_inputs(data, self.vocabulary_dtype) if data.shape.rank == 0: data = tf.expand_dims(data, 0) if data.shape.rank == 1: # Expand dims on axis 0 for tf-idf. A 1-d tensor is a single document. data = tf.expand_dims(data, 0) tokens, counts = self._num_tokens(data) self.token_counts.insert(tokens, counts + self.token_counts.lookup(tokens)) if self.output_mode == TF_IDF: # Dedupe each row of our dataset. deduped_doc_data = tf.map_fn(lambda x: tf.unique(x)[0], data) # Flatten and count tokens. tokens, doc_counts = self._num_tokens(deduped_doc_data) self.token_document_counts.insert( tokens, doc_counts + self.token_document_counts.lookup(tokens)) if tf_utils.is_ragged(data): self.num_documents.assign_add(data.nrows()) else: self.num_documents.assign_add( tf.shape(data, out_type=tf.int64)[0])
def convert_to_list(values, sparse_default_value=None): """Convert a TensorLike, CompositeTensor, or ndarray into a Python list.""" if tf_utils.is_ragged(values): # There is a corner case when dealing with ragged tensors: if you get an # actual RaggedTensor (not a RaggedTensorValue) passed in non-eager mode, # you can't call to_list() on it without evaluating it first. However, # because we don't yet fully support composite tensors across Keras, # backend.get_value() won't evaluate the tensor. # TODO(momernick): Get Keras to recognize composite tensors as Tensors # and then replace this with a call to backend.get_value. if (isinstance(values, tf.RaggedTensor) and not tf.executing_eagerly()): values = backend.get_session(values).run(values) values = values.to_list() if isinstance(values, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)): if sparse_default_value is None: if tf.as_dtype(values.values.dtype) == tf.string: sparse_default_value = '' else: sparse_default_value = -1 dense_tensor = tf.sparse.to_dense(values, default_value=sparse_default_value) values = backend.get_value(dense_tensor) if isinstance(values, tf.Tensor): values = backend.get_value(values) # We may get passed a ndarray or the code above may give us a ndarray. # In either case, we want to force it into a standard python list. if isinstance(values, np.ndarray): values = values.tolist() return values
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = tf.convert_to_tensor(inputs) inputs = self._preprocess(inputs) # If we're not doing any output processing, return right away. if self._output_mode is None: return inputs lookup_data = self._index_lookup_layer(inputs) if self._output_mode == INT: # Maybe trim the output (NOOP if self._output_sequence_length is None). output_tensor = lookup_data[..., :self._output_sequence_length] output_shape = output_tensor.shape.as_list() output_shape[-1] = self._output_sequence_length # If it is a ragged tensor, convert it to dense with correct shape. if tf_utils.is_ragged(output_tensor): return output_tensor.to_tensor(default_value=0, shape=output_shape) if self._output_sequence_length is None: return output_tensor padding, _ = tf.required_space_to_batch_paddings( output_tensor.shape, output_shape) return tf.compat.v1.pad(output_tensor, padding) return lookup_data
def _num_tokens(self, data): """Count the number of tokens in a ragged, sparse or dense tensor.""" if tf_utils.is_sparse(data): flat_values = data.values elif tf_utils.is_ragged(data): flat_values = data.flat_values else: flat_values = tf.reshape(data, [-1]) tokens, _, counts = tf.unique_with_counts(flat_values, out_idx=tf.int64) return tokens, counts
def call(self, inputs): self._maybe_freeze_vocab_size() inputs = self._standardize_inputs(inputs, self._key_dtype) original_shape = inputs.shape # Some ops will not handle scalar input, so uprank to rank 1. if inputs.shape.rank == 0: inputs = self._expand_dims(inputs, -1) if tf_utils.is_sparse(inputs): lookups = tf.SparseTensor(inputs.indices, self._lookup_dense(inputs.values), inputs.dense_shape) elif tf_utils.is_ragged(inputs): lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs) else: lookups = self._lookup_dense(inputs) if self.output_mode == INT: # If we received a scalar input, downrank back to a scalar. if original_shape.rank == 0: lookups = tf.squeeze(lookups, -1) return lookups # One hot will unprank only if the final output dimension is not already 1. if self.output_mode == ONE_HOT: if lookups.shape[-1] != 1: lookups = self._expand_dims(lookups, -1) # TODO(b/190445202): remove output rank restriction. if lookups.shape.rank > 2: raise ValueError( "Received input shape {}, which would result in output rank {}. " "Currently only outputs up to rank 2 are supported for " "`output_mode={}`.".format(original_shape, lookups.shape.rank, self.output_mode)) binary_output = self.output_mode in (MULTI_HOT, ONE_HOT) if self.pad_to_max_tokens: out_depth = self.max_tokens else: out_depth = self._frozen_vocab_size if self.sparse: bincounts = category_encoding.sparse_bincount( lookups, out_depth, binary_output) else: bincounts = category_encoding.dense_bincount( lookups, out_depth, binary_output) if self.output_mode == TF_IDF: return tf.multiply(bincounts, self.idf_weights_const) return bincounts
def call(self, inputs): if not self.max_tokens and self._vocab_size is None: raise ValueError( "You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == tf.int64 and inputs.dtype == tf.int32: inputs = tf.cast(inputs, tf.int64) lookup_result = self._table_handler.lookup(inputs) lookup_checks = [] if self.num_oov_indices == 0 and not self.invert: if tf_utils.is_sparse(inputs): lookup_values = lookup_result.values input_values = inputs.values elif tf_utils.is_ragged(inputs): lookup_values = lookup_result.flat_values input_values = inputs.flat_values else: lookup_values = lookup_result input_values = inputs oov_indices = tf.where(tf.equal(lookup_values, -1)) oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices) msg = tf.strings.format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs, )) assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) with tf.control_dependencies(lookup_checks): if self.output_mode == INT: return tf.identity(lookup_result) multi_hot_output = (self.output_mode == MULTI_HOT) if self._vocab_size and not self.pad_to_max_tokens: out_depth = self._vocab_size else: out_depth = self.max_tokens if self.sparse: bincounts = category_encoding.sparse_bincount( lookup_result, out_depth, multi_hot_output) else: bincounts = category_encoding.dense_bincount( lookup_result, out_depth, multi_hot_output) if self.output_mode == TF_IDF: return tf.multiply(bincounts, self.tf_idf_weights) return bincounts
def _preprocess(self, inputs): if self._standardize == LOWER_AND_STRIP_PUNCTUATION: if tf_utils.is_ragged(inputs): lowercase_inputs = tf.ragged.map_flat_values( tf.strings.lower, inputs) # Depending on configuration, we may never touch the non-data tensor # in the ragged inputs tensor. If that is the case, and this is the # only layer in the keras model, running it will throw an error. # To get around this, we wrap the result in an identity. lowercase_inputs = tf.identity(lowercase_inputs) else: lowercase_inputs = tf.strings.lower(inputs) inputs = tf.strings.regex_replace(lowercase_inputs, DEFAULT_STRIP_REGEX, "") elif callable(self._standardize): inputs = self._standardize(inputs) elif self._standardize is not None: raise ValueError( ("%s is not a supported standardization. " "TextVectorization supports the following options " "for `standardize`: None, " "'lower_and_strip_punctuation', or a " "Callable.") % self._standardize) if self._split is not None: # If we are splitting, we validate that the 1st axis is of dimension 1 and # so can be squeezed out. We do this here instead of after splitting for # performance reasons - it's more expensive to squeeze a ragged tensor. if inputs.shape.ndims > 1: inputs = tf.compat.v1.squeeze(inputs, axis=-1) if self._split == SPLIT_ON_WHITESPACE: # This treats multiple whitespaces as one whitespace, and strips leading # and trailing whitespace. inputs = tf.strings.split(inputs) elif callable(self._split): inputs = self._split(inputs) else: raise ValueError( ("%s is not a supported splitting." "TextVectorization supports the following options " "for `split`: None, 'whitespace', or a Callable.") % self._split) # Note that 'inputs' here can be either ragged or dense depending on the # configuration choices for this Layer. The strings.ngrams op, however, does # support both ragged and dense inputs. if self._ngrams is not None: inputs = tf.strings.ngrams(inputs, ngram_width=self._ngrams, separator=" ") return inputs
def lookup(self, inputs): """Perform a table lookup.""" # Sparse tensors don't play nicely with tensor conversion, so we handle # them before attempting to convert lists or arrays to tensors. if isinstance(inputs, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)): return self._sparse_lookup(inputs) # Try to convert lists/arrays to tensors or RaggedTensors. inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs) # Run the lookup operation on the converted tensor. if tf_utils.is_ragged(inputs): return self._ragged_lookup(inputs) else: return self._tensor_lookup(inputs)
def compute(self, values, accumulator=None): """Compute a step in this computation, returning a new accumulator.""" if isinstance(values, tf.SparseTensor): values = values.values if tf_utils.is_ragged(values): values = values.flat_values flattened_input = np.reshape(values, newshape=(-1, 1)) summaries = [summarize(v, self.epsilon) for v in flattened_input.T] if accumulator is None: return self._create_accumulator(summaries) else: return self._create_accumulator( [merge_summaries(prev_summ, summ, self.epsilon) for prev_summ, summ in zip(accumulator.summaries, summaries)])
def call(self, inputs): def bucketize(inputs): return tf.raw_ops.Bucketize(input=inputs, boundaries=self.bin_boundaries) if tf_utils.is_ragged(inputs): integer_buckets = tf.ragged.map_flat_values(bucketize, inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return tf.identity(integer_buckets) elif tf_utils.is_sparse(inputs): return tf.SparseTensor(indices=tf.identity(inputs.indices), values=bucketize(inputs.values), dense_shape=tf.identity(inputs.dense_shape)) else: return bucketize(inputs)
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = tf.convert_to_tensor(inputs) if not self.max_tokens and self._vocab_size is None: raise ValueError( "You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == tf.int64 and inputs.dtype == tf.int32: inputs = tf.cast(inputs, tf.int64) lookup_result = self._table_handler.lookup(inputs) lookup_checks = [] if self.num_oov_indices == 0 and not self.invert: if tf_utils.is_sparse(inputs): lookup_values = lookup_result.values input_values = inputs.values elif tf_utils.is_ragged(inputs): lookup_values = lookup_result.flat_values input_values = inputs.flat_values else: lookup_values = lookup_result input_values = inputs # tf.where needs rank > 0. if input_values.shape.rank == 0: input_values = self._expand_dims(input_values, -1) lookup_values = self._expand_dims(lookup_values, -1) oov_indices = tf.where(tf.equal(lookup_values, -1)) oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices) msg = tf.strings.format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs, )) assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) with tf.control_dependencies(lookup_checks): if self.output_mode == INT: return tf.identity(lookup_result) else: return self._encode_output(lookup_result)
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = tf.convert_to_tensor(inputs) inputs = self._preprocess(inputs) # If we're not doing any output processing, return right away. if self._output_mode is None: return inputs lookup_data = self._lookup_layer(inputs) # For any non-int output, we can return directly from the underlying # layer. if self._output_mode != INT: return lookup_data if self._ragged: return lookup_data # If we have a ragged tensor, we can pad during the conversion to dense. if tf_utils.is_ragged(lookup_data): shape = lookup_data.shape.as_list() # If output sequence length is None, to_tensor will pad the last # dimension to the bounding shape of the ragged dimension. shape[-1] = self._output_sequence_length return lookup_data.to_tensor(default_value=0, shape=shape) # If we have a dense tensor, we need to pad/trim directly. if self._output_sequence_length is not None: # Maybe trim the output. lookup_data = lookup_data[..., :self._output_sequence_length] # Maybe pad the output. We need to be careful to use dynamic shape # here as required_space_to_batch_paddings requires a fully known # shape. shape = tf.shape(lookup_data) padded_shape = tf.concat( (shape[:-1], [self._output_sequence_length]), 0) padding, _ = tf.required_space_to_batch_paddings( shape, padded_shape) return tf.pad(lookup_data, padding) return lookup_data
def lookup(self, inputs): """Perform a table lookup.""" # Sparse tensors don't play nicely with tensor conversion, so we handle # them before attempting to convert lists or arrays to tensors. if isinstance(inputs, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)): return self._sparse_lookup(inputs) if tf_utils.is_ragged(inputs): if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue): flat_values = tf.convert_to_tensor(value=inputs.flat_values, name="flat_values") inputs = tf.RaggedTensor.from_nested_row_splits( flat_values, inputs.nested_row_splits, validate=False) return self._ragged_lookup(inputs) # For normal tensor inputs inputs = tf.convert_to_tensor(inputs) return self._tensor_lookup(inputs)
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = tf.convert_to_tensor(inputs) self._called = True inputs = self._preprocess(inputs) # If we're not doing any output processing, return right away. if self._output_mode is None: return inputs indexed_data = self._index_lookup_layer(inputs) if self._output_mode == INT: # Once we have the dense tensor, we can return it if we weren't given a # fixed output sequence length. If we were, though, we have to dynamically # choose whether to pad or trim it based on each tensor. # We need to convert to dense if we have a ragged tensor. if tf_utils.is_ragged(indexed_data): dense_data = indexed_data.to_tensor(default_value=0) else: dense_data = indexed_data if self._output_sequence_length is None: return dense_data else: sequence_len = K.shape(dense_data)[1] pad_amt = self._output_sequence_length - sequence_len pad_fn = lambda: tf.compat.v1.pad(dense_data, [[0, 0], [0, pad_amt]]) slice_fn = lambda: dense_data[:, :self._output_sequence_length] output_tensor = tf.compat.v1.cond( sequence_len < self._output_sequence_length, true_fn=pad_fn, false_fn=slice_fn) output_shape = output_tensor.shape.as_list() output_shape[-1] = self._output_sequence_length output_tensor.set_shape(tf.TensorShape(output_shape)) return output_tensor # If we're not returning integers here, we rely on the vectorization layer # to create the output. return self._vectorize_layer(indexed_data)
def call(self, inputs): def bucketize(inputs): outputs = tf.raw_ops.Bucketize(input=inputs, boundaries=self.bin_boundaries) # All other preprocessing layers use int64 for int output, so we conform # here. Sadly the underlying op only supports int32, so we need to cast. return tf.cast(outputs, tf.int64) if tf_utils.is_ragged(inputs): integer_buckets = tf.ragged.map_flat_values(bucketize, inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return tf.identity(integer_buckets) elif tf_utils.is_sparse(inputs): return tf.SparseTensor(indices=tf.identity(inputs.indices), values=bucketize(inputs.values), dense_shape=tf.identity(inputs.dense_shape)) else: return bucketize(inputs)
def call(self, inputs): def bucketize(inputs): return tf.raw_ops.Bucketize(input=inputs, boundaries=self.bin_boundaries) if tf_utils.is_ragged(inputs): indices = tf.ragged.map_flat_values(bucketize, inputs) elif tf_utils.is_sparse(inputs): indices = tf.SparseTensor(indices=tf.identity(inputs.indices), values=bucketize(inputs.values), dense_shape=tf.identity( inputs.dense_shape)) else: indices = bucketize(inputs) return utils.encode_categorical_inputs(indices, output_mode=self.output_mode, depth=len(self.bin_boundaries) + 1, sparse=self.sparse, dtype=self.compute_dtype)
def call(self, inputs): inputs = [self._preprocess_input(inp) for inp in inputs] depth_tuple = self._depth_tuple if self.depth else (len(inputs), ) ragged_out = sparse_out = False if any(tf_utils.is_ragged(inp) for inp in inputs): ragged_out = True elif any(isinstance(inp, tf.SparseTensor) for inp in inputs): sparse_out = True outputs = [] for depth in depth_tuple: if len(inputs) < depth: raise ValueError( f'Number of inputs cannot be less than depth. Received ' f'{len(inputs)} input tensors, and depth {depth}.') for partial_inps in itertools.combinations(inputs, depth): partial_out = self.partial_crossing(partial_inps, ragged_out, sparse_out) outputs.append(partial_out) if sparse_out: return tf.sparse.concat(axis=1, sp_inputs=outputs) return tf.concat(outputs, axis=1)
def call(self, inputs): def _bucketize_op(bins): bins = [tf.cast(bins, tf.float32)] return lambda inputs: tf.raw_ops.BoostedTreesBucketize( # pylint: disable=g-long-lambda float_values=[tf.cast(inputs, tf.float32)], bucket_boundaries=bins)[0] if tf_utils.is_ragged(inputs): integer_buckets = tf.ragged.map_flat_values( _bucketize_op(tf.compat.v1.squeeze(self.bins)), inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return tf.identity(integer_buckets) elif isinstance(inputs, tf.SparseTensor): integer_buckets = tf.raw_ops.BoostedTreesBucketize( float_values=[tf.cast(inputs.values, tf.float32)], bucket_boundaries=[ tf.cast(tf.compat.v1.squeeze(self.bins), tf.float32) ])[0] return tf.SparseTensor(indices=tf.identity(inputs.indices), values=integer_buckets, dense_shape=tf.identity(inputs.dense_shape)) else: input_shape = inputs.get_shape() if any(dim is None for dim in input_shape.as_list()[1:]): raise NotImplementedError( "Discretization Layer requires known non-batch shape," "found {}".format(input_shape)) reshaped = tf.reshape( inputs, [-1, tf.raw_ops.Prod(input=input_shape.as_list()[1:], axis=0)]) return tf.reshape( tf.vectorized_map( _bucketize_op(tf.compat.v1.squeeze(self.bins)), reshaped), tf.constant([-1] + input_shape.as_list()[1:]))
def _process_single_input(self, inputs): # Converts integer inputs to string. if inputs.dtype.is_integer: if isinstance(inputs, tf.SparseTensor): inputs = tf.SparseTensor( indices=inputs.indices, values=tf.as_string(inputs.values), dense_shape=inputs.dense_shape) else: inputs = tf.as_string(inputs) str_to_hash_bucket = self._get_string_to_hash_bucket_fn() if tf_utils.is_ragged(inputs): return tf.ragged.map_flat_values( str_to_hash_bucket, inputs, num_buckets=self.num_bins, name='hash') elif isinstance(inputs, tf.SparseTensor): sparse_values = inputs.values sparse_hashed_values = str_to_hash_bucket( sparse_values, self.num_bins, name='hash') return tf.SparseTensor( indices=inputs.indices, values=sparse_hashed_values, dense_shape=inputs.dense_shape) else: return str_to_hash_bucket(inputs, self.num_bins, name='hash')
def _create_keras_history_helper(tensors, processed_ops, created_layers): """Helper method for `create_keras_history`. Args: tensors: A structure of Tensors for which to create Keras metadata. processed_ops: Set. TensorFlow operations that have already been wrapped in `TensorFlowOpLayer` instances. created_layers: List. The `TensorFlowOpLayer` instances created. Returns: Tuple. First element is the updated set of TensorFlow Operations that have been wrapped in `TensorFlowOpLayer` instances. Second element is a list of the `TensorFlowOpLayer` instances created. """ if tf.compat.v1.executing_eagerly_outside_functions(): raise ValueError( '`create_keras_history` should only be called if eager is disabled!' ) # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. # Cannot be imported at top because of circular dependencies. # TODO(omalleyt): Resolve circular dependency. from keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = tf.nest.flatten(tensors) sparse_ops = [] ragged_tensors = [] for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue if isinstance(tensor, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)): sparse_ops.append(tensor.op) continue if tf_utils.is_ragged(tensor): # Ragged tensors don't have an op property ragged_tensors.append(tensor) continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: # Recursively set `_keras_history`. op_inputs = list(op.inputs) constants = {} layer_inputs = [] for i, op_input in enumerate(op_inputs): if uses_keras_history(op_input): layer_inputs.append(op_input) else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. ds_with_session = ( tf.distribute.in_cross_replica_context() and not tf.compat.v1.executing_eagerly_outside_functions()) using_xla = control_flow_util.GraphOrParentsInXlaContext( tf.compat.v1.get_default_graph()) if ds_with_session or using_xla or _UNSAFE_GRAPH_OP_LAYER_CREATION: # In Legacy Graph mode, evaluating here makes Session be # configured improperly. The downside of this is that saving # via `get_config` breaks, but SavedModel still works. constants[i] = op_input else: with tf.init_scope(): constants[i] = backend.function([], op_input)([]) layer_inputs = unnest_if_single_tensor(layer_inputs) processed_ops, created_layers = _create_keras_history_helper( layer_inputs, processed_ops, created_layers) name = op.name node_def = op.node_def.SerializeToString() op_layer = base_layer.TensorFlowOpLayer(node_def, constants=constants, name=name) created_layers.append(op_layer) op_layer._set_connectivity_metadata( # pylint: disable=protected-access args=(layer_inputs, ), kwargs={}, outputs=op.outputs) processed_ops.update([op]) if sparse_ops or ragged_tensors: lambda_example = """ weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) output = tf.keras.layers.Lambda(weights_mult)(input) """ raise ValueError( 'Tensorflow ops that generate ragged or sparse tensor ' 'outputs are currently not supported by Keras automatic ' 'op wrapping. Please wrap these ops in a Lambda layer: ' '\n\n```\n{example}\n```\n' 'Sparse ops encountered: {sparse_ops}\n' 'Ragged tensors encountered: {ragged_tensors}\n'.format( example=lambda_example, sparse_ops=str(sparse_ops), ragged_tensors=str(ragged_tensors))) return processed_ops, created_layers
def test_is_ragged_return_true_for_ragged_tensor(self): tensor = tf.RaggedTensor.from_row_splits( values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) self.assertTrue(tf_utils.is_ragged(tensor))
def __call__(self, y_true, y_pred, sample_weight=None, regularization_losses=None): """Computes the overall loss. Args: y_true: An arbitrary structure of Tensors representing the ground truth. y_pred: An arbitrary structure of Tensors representing a Model's outputs. sample_weight: An arbitrary structure of Tensors representing the per-sample loss weights. If one Tensor is passed, it is used for all losses. If multiple Tensors are passed, the structure should match `y_pred`. regularization_losses: Additional losses to be added to the total loss. Returns: Tuple of `(total_loss, per_output_loss_list)` """ y_true = self._conform_to_outputs(y_pred, y_true) sample_weight = self._conform_to_outputs(y_pred, sample_weight) if not self._built: self.build(y_pred) y_pred = tf.nest.flatten(y_pred) y_true = tf.nest.flatten(y_true) sample_weight = tf.nest.flatten(sample_weight) loss_values = [] # Used for gradient calculation. loss_metric_values = [] # Used for loss metric calculation. batch_dim = None zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights, self._per_output_metrics) for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args): if y_t is None or loss_obj is None: # Ok to have no loss for an output. continue y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) sw = apply_mask(y_p, sw, get_mask(y_p)) loss_value = loss_obj(y_t, y_p, sample_weight=sw) loss_metric_value = loss_value # Correct for the `Mean` loss metrics counting each replica as a batch. if loss_obj.reduction == losses_utils.ReductionV2.SUM: loss_metric_value *= tf.distribute.get_strategy( ).num_replicas_in_sync if batch_dim is None: if tf_utils.is_ragged(y_t): batch_dim = y_t.nrows() else: batch_dim = tf.compat.v1.shape(y_t)[0] if metric_obj is not None: metric_obj.update_state(loss_metric_value, sample_weight=batch_dim) if loss_weight is not None: loss_value *= loss_weight loss_metric_value *= loss_weight if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or loss_obj.reduction == losses_utils.ReductionV2.AUTO): loss_value = losses_utils.scale_loss_for_distribution( loss_value) loss_values.append(loss_value) loss_metric_values.append(loss_metric_value) if regularization_losses: regularization_losses = losses_utils.cast_losses_to_common_dtype( regularization_losses) reg_loss = tf.add_n(regularization_losses) loss_metric_values.append(reg_loss) loss_values.append( losses_utils.scale_loss_for_distribution(reg_loss)) if loss_values: loss_metric_values = losses_utils.cast_losses_to_common_dtype( loss_metric_values) total_loss_metric_value = tf.add_n(loss_metric_values) self._loss_metric.update_state(total_loss_metric_value, sample_weight=batch_dim) loss_values = losses_utils.cast_losses_to_common_dtype(loss_values) total_loss = tf.add_n(loss_values) return total_loss else: # Ok for a model to have no compiled loss. return tf.zeros(shape=())
def test_is_ragged_return_false_for_list(self): tensor = [1., 2., 3.] self.assertFalse(tf_utils.is_ragged(tensor))