Пример #1
0
    def call(self, inputs):
        self._maybe_freeze_vocab_size()

        inputs = self._standardize_inputs(inputs, self._key_dtype)
        original_shape = inputs.shape
        # Some ops will not handle scalar input, so uprank to rank 1.
        if inputs.shape.rank == 0:
            inputs = self._expand_dims(inputs, -1)

        if tf_utils.is_sparse(inputs):
            lookups = tf.SparseTensor(inputs.indices,
                                      self._lookup_dense(inputs.values),
                                      inputs.dense_shape)
        elif tf_utils.is_ragged(inputs):
            lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs)
        else:
            lookups = self._lookup_dense(inputs)

        if self.output_mode == INT:
            # If we received a scalar input, downrank back to a scalar.
            if original_shape.rank == 0:
                lookups = tf.squeeze(lookups, -1)
            return lookups

        depth = (self.max_tokens
                 if self.pad_to_max_tokens else self._frozen_vocab_size)
        idf_weights = self.idf_weights_const if self.output_mode == TF_IDF else None
        return utils.encode_categorical_inputs(lookups,
                                               output_mode=self.output_mode,
                                               depth=depth,
                                               dtype=self.compute_dtype,
                                               sparse=self.sparse,
                                               idf_weights=idf_weights)
Пример #2
0
 def _num_tokens(self, data):
     """Count the number of tokens in a ragged, sparse or dense tensor."""
     if tf_utils.is_sparse(data):
         flat_values = data.values
     elif tf_utils.is_ragged(data):
         flat_values = data.flat_values
     else:
         flat_values = tf.reshape(data, [-1])
     tokens, _, counts = tf.unique_with_counts(flat_values,
                                               out_idx=tf.int64)
     return tokens, counts
Пример #3
0
    def call(self, inputs):
        self._maybe_freeze_vocab_size()

        inputs = self._standardize_inputs(inputs, self._key_dtype)
        original_shape = inputs.shape
        # Some ops will not handle scalar input, so uprank to rank 1.
        if inputs.shape.rank == 0:
            inputs = self._expand_dims(inputs, -1)

        if tf_utils.is_sparse(inputs):
            lookups = tf.SparseTensor(inputs.indices,
                                      self._lookup_dense(inputs.values),
                                      inputs.dense_shape)
        elif tf_utils.is_ragged(inputs):
            lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs)
        else:
            lookups = self._lookup_dense(inputs)

        if self.output_mode == INT:
            # If we received a scalar input, downrank back to a scalar.
            if original_shape.rank == 0:
                lookups = tf.squeeze(lookups, -1)
            return lookups

        # One hot will unprank only if the final output dimension is not already 1.
        if self.output_mode == ONE_HOT:
            if lookups.shape[-1] != 1:
                lookups = self._expand_dims(lookups, -1)

        # TODO(b/190445202): remove output rank restriction.
        if lookups.shape.rank > 2:
            raise ValueError(
                "Received input shape {}, which would result in output rank {}. "
                "Currently only outputs up to rank 2 are supported for "
                "`output_mode={}`.".format(original_shape, lookups.shape.rank,
                                           self.output_mode))

        binary_output = self.output_mode in (MULTI_HOT, ONE_HOT)
        if self.pad_to_max_tokens:
            out_depth = self.max_tokens
        else:
            out_depth = self._frozen_vocab_size
        if self.sparse:
            bincounts = category_encoding.sparse_bincount(
                lookups, out_depth, binary_output)
        else:
            bincounts = category_encoding.dense_bincount(
                lookups, out_depth, binary_output)

        if self.output_mode == TF_IDF:
            return tf.multiply(bincounts, self.idf_weights_const)

        return bincounts
Пример #4
0
    def call(self, inputs):
        if not self.max_tokens and self._vocab_size is None:
            raise ValueError(
                "You must set the layer's vocabulary before calling it. "
                "Either pass a `vocabulary` argument to the layer, or "
                "call `layer.adapt(dataset)` with some sample data.")
        self._called = True
        if self._key_dtype == tf.int64 and inputs.dtype == tf.int32:
            inputs = tf.cast(inputs, tf.int64)
        lookup_result = self._table_handler.lookup(inputs)

        lookup_checks = []

        if self.num_oov_indices == 0 and not self.invert:
            if tf_utils.is_sparse(inputs):
                lookup_values = lookup_result.values
                input_values = inputs.values
            elif tf_utils.is_ragged(inputs):
                lookup_values = lookup_result.flat_values
                input_values = inputs.flat_values
            else:
                lookup_values = lookup_result
                input_values = inputs
            oov_indices = tf.where(tf.equal(lookup_values, -1))
            oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices)
            msg = tf.strings.format(
                "When `num_oov_indices=0` all inputs should be in vocabulary, "
                "found OOV values {}, consider setting `num_oov_indices=1`.",
                (oov_inputs, ))
            assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0),
                                  [msg])
            lookup_checks.append(assertion)

        with tf.control_dependencies(lookup_checks):
            if self.output_mode == INT:
                return tf.identity(lookup_result)

            multi_hot_output = (self.output_mode == MULTI_HOT)
            if self._vocab_size and not self.pad_to_max_tokens:
                out_depth = self._vocab_size
            else:
                out_depth = self.max_tokens
            if self.sparse:
                bincounts = category_encoding.sparse_bincount(
                    lookup_result, out_depth, multi_hot_output)
            else:
                bincounts = category_encoding.dense_bincount(
                    lookup_result, out_depth, multi_hot_output)

            if self.output_mode == TF_IDF:
                return tf.multiply(bincounts, self.tf_idf_weights)

            return bincounts
Пример #5
0
    def call(self, inputs):
        def bucketize(inputs):
            return tf.raw_ops.Bucketize(input=inputs,
                                        boundaries=self.bin_boundaries)

        if tf_utils.is_ragged(inputs):
            integer_buckets = tf.ragged.map_flat_values(bucketize, inputs)
            # Ragged map_flat_values doesn't touch the non-values tensors in the
            # ragged composite tensor. If this op is the only op a Keras model,
            # this can cause errors in Graph mode, so wrap the tensor in an identity.
            return tf.identity(integer_buckets)
        elif tf_utils.is_sparse(inputs):
            return tf.SparseTensor(indices=tf.identity(inputs.indices),
                                   values=bucketize(inputs.values),
                                   dense_shape=tf.identity(inputs.dense_shape))
        else:
            return bucketize(inputs)
Пример #6
0
    def call(self, inputs):
        if isinstance(inputs, (list, tuple, np.ndarray)):
            inputs = tf.convert_to_tensor(inputs)

        if not self.max_tokens and self._vocab_size is None:
            raise ValueError(
                "You must set the layer's vocabulary before calling it. "
                "Either pass a `vocabulary` argument to the layer, or "
                "call `layer.adapt(dataset)` with some sample data.")
        self._called = True
        if self._key_dtype == tf.int64 and inputs.dtype == tf.int32:
            inputs = tf.cast(inputs, tf.int64)
        lookup_result = self._table_handler.lookup(inputs)

        lookup_checks = []

        if self.num_oov_indices == 0 and not self.invert:
            if tf_utils.is_sparse(inputs):
                lookup_values = lookup_result.values
                input_values = inputs.values
            elif tf_utils.is_ragged(inputs):
                lookup_values = lookup_result.flat_values
                input_values = inputs.flat_values
            else:
                lookup_values = lookup_result
                input_values = inputs
            # tf.where needs rank > 0.
            if input_values.shape.rank == 0:
                input_values = self._expand_dims(input_values, -1)
                lookup_values = self._expand_dims(lookup_values, -1)
            oov_indices = tf.where(tf.equal(lookup_values, -1))
            oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices)
            msg = tf.strings.format(
                "When `num_oov_indices=0` all inputs should be in vocabulary, "
                "found OOV values {}, consider setting `num_oov_indices=1`.",
                (oov_inputs, ))
            assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0),
                                  [msg])
            lookup_checks.append(assertion)

        with tf.control_dependencies(lookup_checks):
            if self.output_mode == INT:
                return tf.identity(lookup_result)
            else:
                return self._encode_output(lookup_result)
Пример #7
0
    def call(self, inputs):
        def bucketize(inputs):
            outputs = tf.raw_ops.Bucketize(input=inputs,
                                           boundaries=self.bin_boundaries)
            # All other preprocessing layers use int64 for int output, so we conform
            # here. Sadly the underlying op only supports int32, so we need to cast.
            return tf.cast(outputs, tf.int64)

        if tf_utils.is_ragged(inputs):
            integer_buckets = tf.ragged.map_flat_values(bucketize, inputs)
            # Ragged map_flat_values doesn't touch the non-values tensors in the
            # ragged composite tensor. If this op is the only op a Keras model,
            # this can cause errors in Graph mode, so wrap the tensor in an identity.
            return tf.identity(integer_buckets)
        elif tf_utils.is_sparse(inputs):
            return tf.SparseTensor(indices=tf.identity(inputs.indices),
                                   values=bucketize(inputs.values),
                                   dense_shape=tf.identity(inputs.dense_shape))
        else:
            return bucketize(inputs)
Пример #8
0
    def call(self, inputs):
        def bucketize(inputs):
            return tf.raw_ops.Bucketize(input=inputs,
                                        boundaries=self.bin_boundaries)

        if tf_utils.is_ragged(inputs):
            indices = tf.ragged.map_flat_values(bucketize, inputs)
        elif tf_utils.is_sparse(inputs):
            indices = tf.SparseTensor(indices=tf.identity(inputs.indices),
                                      values=bucketize(inputs.values),
                                      dense_shape=tf.identity(
                                          inputs.dense_shape))
        else:
            indices = bucketize(inputs)

        return utils.encode_categorical_inputs(indices,
                                               output_mode=self.output_mode,
                                               depth=len(self.bin_boundaries) +
                                               1,
                                               sparse=self.sparse,
                                               dtype=self.compute_dtype)
Пример #9
0
 def expand_dims(inputs, axis):
     if tf_utils.is_sparse(inputs):
         return tf.sparse.expand_dims(inputs, axis)
     else:
         return tf.compat.v1.expand_dims(inputs, axis)
Пример #10
0
 def test_is_sparse_return_false_for_list(self):
   tensor = [1., 2., 3.]
   self.assertFalse(tf_utils.is_sparse(tensor))
Пример #11
0
 def test_is_sparse_return_true_for_sparse_tensor_value(self):
   tensor = tf.compat.v1.SparseTensorValue(
       indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
   self.assertTrue(tf_utils.is_sparse(tensor))
Пример #12
0
 def _expand_dims(self, inputs, axis):
     if tf_utils.is_sparse(inputs):
         return tf.sparse.expand_dims(inputs, axis)
     else:
         return tf.expand_dims(inputs, axis)
Пример #13
0
def expand_dims(inputs, axis):
    """Expand dims on sparse, ragged, or dense tensors."""
    if tf_utils.is_sparse(inputs):
        return tf.sparse.expand_dims(inputs, axis)
    else:
        return tf.expand_dims(inputs, axis)