Пример #1
0
    def call(self, inputs):
        self._maybe_freeze_vocab_size()

        inputs = self._standardize_inputs(inputs, self._key_dtype)
        original_shape = inputs.shape
        # Some ops will not handle scalar input, so uprank to rank 1.
        if inputs.shape.rank == 0:
            inputs = self._expand_dims(inputs, -1)

        if tf_utils.is_sparse(inputs):
            lookups = tf.SparseTensor(inputs.indices,
                                      self._lookup_dense(inputs.values),
                                      inputs.dense_shape)
        elif tf_utils.is_ragged(inputs):
            lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs)
        else:
            lookups = self._lookup_dense(inputs)

        if self.output_mode == INT:
            # If we received a scalar input, downrank back to a scalar.
            if original_shape.rank == 0:
                lookups = tf.squeeze(lookups, -1)
            return lookups

        depth = (self.max_tokens
                 if self.pad_to_max_tokens else self._frozen_vocab_size)
        idf_weights = self.idf_weights_const if self.output_mode == TF_IDF else None
        return utils.encode_categorical_inputs(lookups,
                                               output_mode=self.output_mode,
                                               depth=depth,
                                               dtype=self.compute_dtype,
                                               sparse=self.sparse,
                                               idf_weights=idf_weights)
Пример #2
0
  def call(self, inputs):
    bins = [tf.cast(tf.compat.v1.squeeze(self.bins), tf.float32)]

    def _bucketize_fn(inputs):
      return tf.raw_ops.BoostedTreesBucketize(
          float_values=[tf.cast(inputs, tf.float32)],
          bucket_boundaries=bins)[0]

    if tf_utils.is_ragged(inputs):
      integer_buckets = tf.ragged.map_flat_values(
          _bucketize_fn, inputs)
      # Ragged map_flat_values doesn't touch the non-values tensors in the
      # ragged composite tensor. If this op is the only op a Keras model,
      # this can cause errors in Graph mode, so wrap the tensor in an identity.
      return tf.identity(integer_buckets)
    elif isinstance(inputs, tf.SparseTensor):
      return tf.SparseTensor(
          indices=tf.identity(inputs.indices),
          values=_bucketize_fn(inputs.values),
          dense_shape=tf.identity(inputs.dense_shape))
    else:
      static_shape = inputs.get_shape()
      if any(dim is None for dim in static_shape.as_list()[1:]):
        raise NotImplementedError(
            "Discretization Layer requires known non-batch shape,"
            "found {}".format(static_shape))

      dynamic_shape = tf.shape(inputs)
      # BoostedTreesBucketize only handles rank 1 inputs. We need to flatten our
      # inputs after batch size and vectorized_map over each sample.
      reshaped = tf.reshape(inputs, [dynamic_shape[0], -1])
      return tf.reshape(
          tf.vectorized_map(_bucketize_fn, reshaped),
          dynamic_shape)
Пример #3
0
    def update_state(self, data):
        if self._has_input_vocabulary:
            raise ValueError(
                "Cannot adapt {} layer after setting a static vocabulary via init "
                "argument or `set_vocabulary`.".format(
                    self.__class__.__name__))

        data = self._standardize_inputs(data, self.vocabulary_dtype)
        if data.shape.rank == 0:
            data = tf.expand_dims(data, 0)
        if data.shape.rank == 1:
            # Expand dims on axis 0 for tf-idf. A 1-d tensor is a single document.
            data = tf.expand_dims(data, 0)

        tokens, counts = self._num_tokens(data)
        self.token_counts.insert(tokens,
                                 counts + self.token_counts.lookup(tokens))

        if self.output_mode == TF_IDF:
            # Dedupe each row of our dataset.
            deduped_doc_data = tf.map_fn(lambda x: tf.unique(x)[0], data)
            # Flatten and count tokens.
            tokens, doc_counts = self._num_tokens(deduped_doc_data)
            self.token_document_counts.insert(
                tokens, doc_counts + self.token_document_counts.lookup(tokens))
            if tf_utils.is_ragged(data):
                self.num_documents.assign_add(data.nrows())
            else:
                self.num_documents.assign_add(
                    tf.shape(data, out_type=tf.int64)[0])
Пример #4
0
def convert_to_list(values, sparse_default_value=None):
    """Convert a TensorLike, CompositeTensor, or ndarray into a Python list."""
    if tf_utils.is_ragged(values):
        # There is a corner case when dealing with ragged tensors: if you get an
        # actual RaggedTensor (not a RaggedTensorValue) passed in non-eager mode,
        # you can't call to_list() on it without evaluating it first. However,
        # because we don't yet fully support composite tensors across Keras,
        # backend.get_value() won't evaluate the tensor.
        # TODO(momernick): Get Keras to recognize composite tensors as Tensors
        # and then replace this with a call to backend.get_value.
        if (isinstance(values, tf.RaggedTensor)
                and not tf.executing_eagerly()):
            values = backend.get_session(values).run(values)
        values = values.to_list()

    if isinstance(values, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
        if sparse_default_value is None:
            if tf.as_dtype(values.values.dtype) == tf.string:
                sparse_default_value = ''
            else:
                sparse_default_value = -1
        dense_tensor = tf.sparse.to_dense(values,
                                          default_value=sparse_default_value)
        values = backend.get_value(dense_tensor)

    if isinstance(values, tf.Tensor):
        values = backend.get_value(values)

    # We may get passed a ndarray or the code above may give us a ndarray.
    # In either case, we want to force it into a standard python list.
    if isinstance(values, np.ndarray):
        values = values.tolist()

    return values
Пример #5
0
    def call(self, inputs):
        if isinstance(inputs, (list, tuple, np.ndarray)):
            inputs = tf.convert_to_tensor(inputs)

        inputs = self._preprocess(inputs)

        # If we're not doing any output processing, return right away.
        if self._output_mode is None:
            return inputs

        lookup_data = self._index_lookup_layer(inputs)
        if self._output_mode == INT:

            # Maybe trim the output (NOOP if self._output_sequence_length is None).
            output_tensor = lookup_data[..., :self._output_sequence_length]

            output_shape = output_tensor.shape.as_list()
            output_shape[-1] = self._output_sequence_length

            # If it is a ragged tensor, convert it to dense with correct shape.
            if tf_utils.is_ragged(output_tensor):
                return output_tensor.to_tensor(default_value=0,
                                               shape=output_shape)

            if self._output_sequence_length is None:
                return output_tensor

            padding, _ = tf.required_space_to_batch_paddings(
                output_tensor.shape, output_shape)
            return tf.compat.v1.pad(output_tensor, padding)

        return lookup_data
Пример #6
0
 def _num_tokens(self, data):
     """Count the number of tokens in a ragged, sparse or dense tensor."""
     if tf_utils.is_sparse(data):
         flat_values = data.values
     elif tf_utils.is_ragged(data):
         flat_values = data.flat_values
     else:
         flat_values = tf.reshape(data, [-1])
     tokens, _, counts = tf.unique_with_counts(flat_values,
                                               out_idx=tf.int64)
     return tokens, counts
Пример #7
0
    def call(self, inputs):
        self._maybe_freeze_vocab_size()

        inputs = self._standardize_inputs(inputs, self._key_dtype)
        original_shape = inputs.shape
        # Some ops will not handle scalar input, so uprank to rank 1.
        if inputs.shape.rank == 0:
            inputs = self._expand_dims(inputs, -1)

        if tf_utils.is_sparse(inputs):
            lookups = tf.SparseTensor(inputs.indices,
                                      self._lookup_dense(inputs.values),
                                      inputs.dense_shape)
        elif tf_utils.is_ragged(inputs):
            lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs)
        else:
            lookups = self._lookup_dense(inputs)

        if self.output_mode == INT:
            # If we received a scalar input, downrank back to a scalar.
            if original_shape.rank == 0:
                lookups = tf.squeeze(lookups, -1)
            return lookups

        # One hot will unprank only if the final output dimension is not already 1.
        if self.output_mode == ONE_HOT:
            if lookups.shape[-1] != 1:
                lookups = self._expand_dims(lookups, -1)

        # TODO(b/190445202): remove output rank restriction.
        if lookups.shape.rank > 2:
            raise ValueError(
                "Received input shape {}, which would result in output rank {}. "
                "Currently only outputs up to rank 2 are supported for "
                "`output_mode={}`.".format(original_shape, lookups.shape.rank,
                                           self.output_mode))

        binary_output = self.output_mode in (MULTI_HOT, ONE_HOT)
        if self.pad_to_max_tokens:
            out_depth = self.max_tokens
        else:
            out_depth = self._frozen_vocab_size
        if self.sparse:
            bincounts = category_encoding.sparse_bincount(
                lookups, out_depth, binary_output)
        else:
            bincounts = category_encoding.dense_bincount(
                lookups, out_depth, binary_output)

        if self.output_mode == TF_IDF:
            return tf.multiply(bincounts, self.idf_weights_const)

        return bincounts
Пример #8
0
    def call(self, inputs):
        if not self.max_tokens and self._vocab_size is None:
            raise ValueError(
                "You must set the layer's vocabulary before calling it. "
                "Either pass a `vocabulary` argument to the layer, or "
                "call `layer.adapt(dataset)` with some sample data.")
        self._called = True
        if self._key_dtype == tf.int64 and inputs.dtype == tf.int32:
            inputs = tf.cast(inputs, tf.int64)
        lookup_result = self._table_handler.lookup(inputs)

        lookup_checks = []

        if self.num_oov_indices == 0 and not self.invert:
            if tf_utils.is_sparse(inputs):
                lookup_values = lookup_result.values
                input_values = inputs.values
            elif tf_utils.is_ragged(inputs):
                lookup_values = lookup_result.flat_values
                input_values = inputs.flat_values
            else:
                lookup_values = lookup_result
                input_values = inputs
            oov_indices = tf.where(tf.equal(lookup_values, -1))
            oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices)
            msg = tf.strings.format(
                "When `num_oov_indices=0` all inputs should be in vocabulary, "
                "found OOV values {}, consider setting `num_oov_indices=1`.",
                (oov_inputs, ))
            assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0),
                                  [msg])
            lookup_checks.append(assertion)

        with tf.control_dependencies(lookup_checks):
            if self.output_mode == INT:
                return tf.identity(lookup_result)

            multi_hot_output = (self.output_mode == MULTI_HOT)
            if self._vocab_size and not self.pad_to_max_tokens:
                out_depth = self._vocab_size
            else:
                out_depth = self.max_tokens
            if self.sparse:
                bincounts = category_encoding.sparse_bincount(
                    lookup_result, out_depth, multi_hot_output)
            else:
                bincounts = category_encoding.dense_bincount(
                    lookup_result, out_depth, multi_hot_output)

            if self.output_mode == TF_IDF:
                return tf.multiply(bincounts, self.tf_idf_weights)

            return bincounts
Пример #9
0
    def _preprocess(self, inputs):
        if self._standardize == LOWER_AND_STRIP_PUNCTUATION:
            if tf_utils.is_ragged(inputs):
                lowercase_inputs = tf.ragged.map_flat_values(
                    tf.strings.lower, inputs)
                # Depending on configuration, we may never touch the non-data tensor
                # in the ragged inputs tensor. If that is the case, and this is the
                # only layer in the keras model, running it will throw an error.
                # To get around this, we wrap the result in an identity.
                lowercase_inputs = tf.identity(lowercase_inputs)
            else:
                lowercase_inputs = tf.strings.lower(inputs)
            inputs = tf.strings.regex_replace(lowercase_inputs,
                                              DEFAULT_STRIP_REGEX, "")
        elif callable(self._standardize):
            inputs = self._standardize(inputs)
        elif self._standardize is not None:
            raise ValueError(
                ("%s is not a supported standardization. "
                 "TextVectorization supports the following options "
                 "for `standardize`: None, "
                 "'lower_and_strip_punctuation', or a "
                 "Callable.") % self._standardize)

        if self._split is not None:
            # If we are splitting, we validate that the 1st axis is of dimension 1 and
            # so can be squeezed out. We do this here instead of after splitting for
            # performance reasons - it's more expensive to squeeze a ragged tensor.
            if inputs.shape.ndims > 1:
                inputs = tf.compat.v1.squeeze(inputs, axis=-1)
            if self._split == SPLIT_ON_WHITESPACE:
                # This treats multiple whitespaces as one whitespace, and strips leading
                # and trailing whitespace.
                inputs = tf.strings.split(inputs)
            elif callable(self._split):
                inputs = self._split(inputs)
            else:
                raise ValueError(
                    ("%s is not a supported splitting."
                     "TextVectorization supports the following options "
                     "for `split`: None, 'whitespace', or a Callable.") %
                    self._split)

        # Note that 'inputs' here can be either ragged or dense depending on the
        # configuration choices for this Layer. The strings.ngrams op, however, does
        # support both ragged and dense inputs.
        if self._ngrams is not None:
            inputs = tf.strings.ngrams(inputs,
                                       ngram_width=self._ngrams,
                                       separator=" ")

        return inputs
Пример #10
0
    def lookup(self, inputs):
        """Perform a table lookup."""
        # Sparse tensors don't play nicely with tensor conversion, so we handle
        # them before attempting to convert lists or arrays to tensors.
        if isinstance(inputs,
                      (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
            return self._sparse_lookup(inputs)

        # Try to convert lists/arrays to tensors or RaggedTensors.
        inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs)

        # Run the lookup operation on the converted tensor.
        if tf_utils.is_ragged(inputs):
            return self._ragged_lookup(inputs)
        else:
            return self._tensor_lookup(inputs)
Пример #11
0
    def compute(self, values, accumulator=None):
      """Compute a step in this computation, returning a new accumulator."""

      if isinstance(values, tf.SparseTensor):
        values = values.values
      if tf_utils.is_ragged(values):
        values = values.flat_values
      flattened_input = np.reshape(values, newshape=(-1, 1))

      summaries = [summarize(v, self.epsilon) for v in flattened_input.T]

      if accumulator is None:
        return self._create_accumulator(summaries)
      else:
        return self._create_accumulator(
            [merge_summaries(prev_summ, summ, self.epsilon)
             for prev_summ, summ in zip(accumulator.summaries, summaries)])
Пример #12
0
    def call(self, inputs):
        def bucketize(inputs):
            return tf.raw_ops.Bucketize(input=inputs,
                                        boundaries=self.bin_boundaries)

        if tf_utils.is_ragged(inputs):
            integer_buckets = tf.ragged.map_flat_values(bucketize, inputs)
            # Ragged map_flat_values doesn't touch the non-values tensors in the
            # ragged composite tensor. If this op is the only op a Keras model,
            # this can cause errors in Graph mode, so wrap the tensor in an identity.
            return tf.identity(integer_buckets)
        elif tf_utils.is_sparse(inputs):
            return tf.SparseTensor(indices=tf.identity(inputs.indices),
                                   values=bucketize(inputs.values),
                                   dense_shape=tf.identity(inputs.dense_shape))
        else:
            return bucketize(inputs)
Пример #13
0
    def call(self, inputs):
        if isinstance(inputs, (list, tuple, np.ndarray)):
            inputs = tf.convert_to_tensor(inputs)

        if not self.max_tokens and self._vocab_size is None:
            raise ValueError(
                "You must set the layer's vocabulary before calling it. "
                "Either pass a `vocabulary` argument to the layer, or "
                "call `layer.adapt(dataset)` with some sample data.")
        self._called = True
        if self._key_dtype == tf.int64 and inputs.dtype == tf.int32:
            inputs = tf.cast(inputs, tf.int64)
        lookup_result = self._table_handler.lookup(inputs)

        lookup_checks = []

        if self.num_oov_indices == 0 and not self.invert:
            if tf_utils.is_sparse(inputs):
                lookup_values = lookup_result.values
                input_values = inputs.values
            elif tf_utils.is_ragged(inputs):
                lookup_values = lookup_result.flat_values
                input_values = inputs.flat_values
            else:
                lookup_values = lookup_result
                input_values = inputs
            # tf.where needs rank > 0.
            if input_values.shape.rank == 0:
                input_values = self._expand_dims(input_values, -1)
                lookup_values = self._expand_dims(lookup_values, -1)
            oov_indices = tf.where(tf.equal(lookup_values, -1))
            oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices)
            msg = tf.strings.format(
                "When `num_oov_indices=0` all inputs should be in vocabulary, "
                "found OOV values {}, consider setting `num_oov_indices=1`.",
                (oov_inputs, ))
            assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0),
                                  [msg])
            lookup_checks.append(assertion)

        with tf.control_dependencies(lookup_checks):
            if self.output_mode == INT:
                return tf.identity(lookup_result)
            else:
                return self._encode_output(lookup_result)
Пример #14
0
    def call(self, inputs):
        if isinstance(inputs, (list, tuple, np.ndarray)):
            inputs = tf.convert_to_tensor(inputs)

        inputs = self._preprocess(inputs)

        # If we're not doing any output processing, return right away.
        if self._output_mode is None:
            return inputs

        lookup_data = self._lookup_layer(inputs)

        # For any non-int output, we can return directly from the underlying
        # layer.
        if self._output_mode != INT:
            return lookup_data

        if self._ragged:
            return lookup_data

        # If we have a ragged tensor, we can pad during the conversion to dense.
        if tf_utils.is_ragged(lookup_data):
            shape = lookup_data.shape.as_list()
            # If output sequence length is None, to_tensor will pad the last
            # dimension to the bounding shape of the ragged dimension.
            shape[-1] = self._output_sequence_length
            return lookup_data.to_tensor(default_value=0, shape=shape)

        # If we have a dense tensor, we need to pad/trim directly.
        if self._output_sequence_length is not None:
            # Maybe trim the output.
            lookup_data = lookup_data[..., :self._output_sequence_length]

            # Maybe pad the output. We need to be careful to use dynamic shape
            # here as required_space_to_batch_paddings requires a fully known
            # shape.
            shape = tf.shape(lookup_data)
            padded_shape = tf.concat(
                (shape[:-1], [self._output_sequence_length]), 0)
            padding, _ = tf.required_space_to_batch_paddings(
                shape, padded_shape)
            return tf.pad(lookup_data, padding)

        return lookup_data
Пример #15
0
    def lookup(self, inputs):
        """Perform a table lookup."""
        # Sparse tensors don't play nicely with tensor conversion, so we handle
        # them before attempting to convert lists or arrays to tensors.
        if isinstance(inputs,
                      (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
            return self._sparse_lookup(inputs)

        if tf_utils.is_ragged(inputs):
            if isinstance(inputs, tf.compat.v1.ragged.RaggedTensorValue):
                flat_values = tf.convert_to_tensor(value=inputs.flat_values,
                                                   name="flat_values")
                inputs = tf.RaggedTensor.from_nested_row_splits(
                    flat_values, inputs.nested_row_splits, validate=False)
            return self._ragged_lookup(inputs)

        # For normal tensor inputs
        inputs = tf.convert_to_tensor(inputs)
        return self._tensor_lookup(inputs)
Пример #16
0
    def call(self, inputs):
        if isinstance(inputs, (list, tuple, np.ndarray)):
            inputs = tf.convert_to_tensor(inputs)

        self._called = True
        inputs = self._preprocess(inputs)

        # If we're not doing any output processing, return right away.
        if self._output_mode is None:
            return inputs
        indexed_data = self._index_lookup_layer(inputs)
        if self._output_mode == INT:
            # Once we have the dense tensor, we can return it if we weren't given a
            # fixed output sequence length. If we were, though, we have to dynamically
            # choose whether to pad or trim it based on each tensor.

            # We need to convert to dense if we have a ragged tensor.
            if tf_utils.is_ragged(indexed_data):
                dense_data = indexed_data.to_tensor(default_value=0)
            else:
                dense_data = indexed_data

            if self._output_sequence_length is None:
                return dense_data
            else:
                sequence_len = K.shape(dense_data)[1]
                pad_amt = self._output_sequence_length - sequence_len
                pad_fn = lambda: tf.compat.v1.pad(dense_data, [[0, 0],
                                                               [0, pad_amt]])
                slice_fn = lambda: dense_data[:, :self._output_sequence_length]
                output_tensor = tf.compat.v1.cond(
                    sequence_len < self._output_sequence_length,
                    true_fn=pad_fn,
                    false_fn=slice_fn)
                output_shape = output_tensor.shape.as_list()
                output_shape[-1] = self._output_sequence_length
                output_tensor.set_shape(tf.TensorShape(output_shape))
                return output_tensor

        # If we're not returning integers here, we rely on the vectorization layer
        # to create the output.
        return self._vectorize_layer(indexed_data)
Пример #17
0
    def call(self, inputs):
        def bucketize(inputs):
            outputs = tf.raw_ops.Bucketize(input=inputs,
                                           boundaries=self.bin_boundaries)
            # All other preprocessing layers use int64 for int output, so we conform
            # here. Sadly the underlying op only supports int32, so we need to cast.
            return tf.cast(outputs, tf.int64)

        if tf_utils.is_ragged(inputs):
            integer_buckets = tf.ragged.map_flat_values(bucketize, inputs)
            # Ragged map_flat_values doesn't touch the non-values tensors in the
            # ragged composite tensor. If this op is the only op a Keras model,
            # this can cause errors in Graph mode, so wrap the tensor in an identity.
            return tf.identity(integer_buckets)
        elif tf_utils.is_sparse(inputs):
            return tf.SparseTensor(indices=tf.identity(inputs.indices),
                                   values=bucketize(inputs.values),
                                   dense_shape=tf.identity(inputs.dense_shape))
        else:
            return bucketize(inputs)
Пример #18
0
    def call(self, inputs):
        def bucketize(inputs):
            return tf.raw_ops.Bucketize(input=inputs,
                                        boundaries=self.bin_boundaries)

        if tf_utils.is_ragged(inputs):
            indices = tf.ragged.map_flat_values(bucketize, inputs)
        elif tf_utils.is_sparse(inputs):
            indices = tf.SparseTensor(indices=tf.identity(inputs.indices),
                                      values=bucketize(inputs.values),
                                      dense_shape=tf.identity(
                                          inputs.dense_shape))
        else:
            indices = bucketize(inputs)

        return utils.encode_categorical_inputs(indices,
                                               output_mode=self.output_mode,
                                               depth=len(self.bin_boundaries) +
                                               1,
                                               sparse=self.sparse,
                                               dtype=self.compute_dtype)
Пример #19
0
    def call(self, inputs):
        inputs = [self._preprocess_input(inp) for inp in inputs]
        depth_tuple = self._depth_tuple if self.depth else (len(inputs), )
        ragged_out = sparse_out = False
        if any(tf_utils.is_ragged(inp) for inp in inputs):
            ragged_out = True
        elif any(isinstance(inp, tf.SparseTensor) for inp in inputs):
            sparse_out = True

        outputs = []
        for depth in depth_tuple:
            if len(inputs) < depth:
                raise ValueError(
                    f'Number of inputs cannot be less than depth. Received '
                    f'{len(inputs)} input tensors, and depth {depth}.')
            for partial_inps in itertools.combinations(inputs, depth):
                partial_out = self.partial_crossing(partial_inps, ragged_out,
                                                    sparse_out)
                outputs.append(partial_out)
        if sparse_out:
            return tf.sparse.concat(axis=1, sp_inputs=outputs)
        return tf.concat(outputs, axis=1)
Пример #20
0
    def call(self, inputs):
        def _bucketize_op(bins):
            bins = [tf.cast(bins, tf.float32)]
            return lambda inputs: tf.raw_ops.BoostedTreesBucketize(  # pylint: disable=g-long-lambda
                float_values=[tf.cast(inputs, tf.float32)],
                bucket_boundaries=bins)[0]

        if tf_utils.is_ragged(inputs):
            integer_buckets = tf.ragged.map_flat_values(
                _bucketize_op(tf.compat.v1.squeeze(self.bins)), inputs)
            # Ragged map_flat_values doesn't touch the non-values tensors in the
            # ragged composite tensor. If this op is the only op a Keras model,
            # this can cause errors in Graph mode, so wrap the tensor in an identity.
            return tf.identity(integer_buckets)
        elif isinstance(inputs, tf.SparseTensor):
            integer_buckets = tf.raw_ops.BoostedTreesBucketize(
                float_values=[tf.cast(inputs.values, tf.float32)],
                bucket_boundaries=[
                    tf.cast(tf.compat.v1.squeeze(self.bins), tf.float32)
                ])[0]
            return tf.SparseTensor(indices=tf.identity(inputs.indices),
                                   values=integer_buckets,
                                   dense_shape=tf.identity(inputs.dense_shape))
        else:
            input_shape = inputs.get_shape()
            if any(dim is None for dim in input_shape.as_list()[1:]):
                raise NotImplementedError(
                    "Discretization Layer requires known non-batch shape,"
                    "found {}".format(input_shape))

            reshaped = tf.reshape(
                inputs,
                [-1,
                 tf.raw_ops.Prod(input=input_shape.as_list()[1:], axis=0)])

            return tf.reshape(
                tf.vectorized_map(
                    _bucketize_op(tf.compat.v1.squeeze(self.bins)), reshaped),
                tf.constant([-1] + input_shape.as_list()[1:]))
Пример #21
0
 def _process_single_input(self, inputs):
   # Converts integer inputs to string.
   if inputs.dtype.is_integer:
     if isinstance(inputs, tf.SparseTensor):
       inputs = tf.SparseTensor(
           indices=inputs.indices,
           values=tf.as_string(inputs.values),
           dense_shape=inputs.dense_shape)
     else:
       inputs = tf.as_string(inputs)
   str_to_hash_bucket = self._get_string_to_hash_bucket_fn()
   if tf_utils.is_ragged(inputs):
     return tf.ragged.map_flat_values(
         str_to_hash_bucket, inputs, num_buckets=self.num_bins, name='hash')
   elif isinstance(inputs, tf.SparseTensor):
     sparse_values = inputs.values
     sparse_hashed_values = str_to_hash_bucket(
         sparse_values, self.num_bins, name='hash')
     return tf.SparseTensor(
         indices=inputs.indices,
         values=sparse_hashed_values,
         dense_shape=inputs.dense_shape)
   else:
     return str_to_hash_bucket(inputs, self.num_bins, name='hash')
Пример #22
0
def _create_keras_history_helper(tensors, processed_ops, created_layers):
    """Helper method for `create_keras_history`.

  Args:
    tensors: A structure of Tensors for which to create Keras metadata.
    processed_ops: Set. TensorFlow operations that have already been wrapped in
      `TensorFlowOpLayer` instances.
    created_layers: List. The `TensorFlowOpLayer` instances created.

  Returns:
    Tuple. First element is the updated set of TensorFlow Operations that
    have been wrapped in `TensorFlowOpLayer` instances. Second element is
    a list of the `TensorFlowOpLayer` instances created.
  """
    if tf.compat.v1.executing_eagerly_outside_functions():
        raise ValueError(
            '`create_keras_history` should only be called if eager is disabled!'
        )
    # Import of `base_layer` needed in order to create `TensorFlowOpLayer`.
    # Cannot be imported at top because of circular dependencies.
    # TODO(omalleyt): Resolve circular dependency.
    from keras.engine import base_layer  # pylint: disable=g-import-not-at-top
    tensor_list = tf.nest.flatten(tensors)
    sparse_ops = []
    ragged_tensors = []
    for tensor in tensor_list:
        if getattr(tensor, '_keras_history', None) is not None:
            continue
        if isinstance(tensor,
                      (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
            sparse_ops.append(tensor.op)
            continue
        if tf_utils.is_ragged(tensor):
            # Ragged tensors don't have an op property
            ragged_tensors.append(tensor)
            continue
        op = tensor.op  # The Op that created this Tensor.
        if op not in processed_ops:
            # Recursively set `_keras_history`.
            op_inputs = list(op.inputs)
            constants = {}
            layer_inputs = []
            for i, op_input in enumerate(op_inputs):
                if uses_keras_history(op_input):
                    layer_inputs.append(op_input)
                else:
                    # Treat any value not originating from a `keras.Input` as
                    # a constant. Variables cannot be supported.
                    ds_with_session = (
                        tf.distribute.in_cross_replica_context() and
                        not tf.compat.v1.executing_eagerly_outside_functions())
                    using_xla = control_flow_util.GraphOrParentsInXlaContext(
                        tf.compat.v1.get_default_graph())
                    if ds_with_session or using_xla or _UNSAFE_GRAPH_OP_LAYER_CREATION:
                        # In Legacy Graph mode, evaluating here makes Session be
                        # configured improperly. The downside of this is that saving
                        # via `get_config` breaks, but SavedModel still works.
                        constants[i] = op_input
                    else:
                        with tf.init_scope():
                            constants[i] = backend.function([], op_input)([])
            layer_inputs = unnest_if_single_tensor(layer_inputs)
            processed_ops, created_layers = _create_keras_history_helper(
                layer_inputs, processed_ops, created_layers)
            name = op.name
            node_def = op.node_def.SerializeToString()
            op_layer = base_layer.TensorFlowOpLayer(node_def,
                                                    constants=constants,
                                                    name=name)
            created_layers.append(op_layer)
            op_layer._set_connectivity_metadata(  # pylint: disable=protected-access
                args=(layer_inputs, ),
                kwargs={},
                outputs=op.outputs)
            processed_ops.update([op])
    if sparse_ops or ragged_tensors:
        lambda_example = """
    weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights)
    output = tf.keras.layers.Lambda(weights_mult)(input)
    """
        raise ValueError(
            'Tensorflow ops that generate ragged or sparse tensor '
            'outputs are currently not supported by Keras automatic '
            'op wrapping. Please wrap these ops in a Lambda layer: '
            '\n\n```\n{example}\n```\n'
            'Sparse ops encountered: {sparse_ops}\n'
            'Ragged tensors encountered: {ragged_tensors}\n'.format(
                example=lambda_example,
                sparse_ops=str(sparse_ops),
                ragged_tensors=str(ragged_tensors)))
    return processed_ops, created_layers
Пример #23
0
 def test_is_ragged_return_true_for_ragged_tensor(self):
     tensor = tf.RaggedTensor.from_row_splits(
         values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8])
     self.assertTrue(tf_utils.is_ragged(tensor))
Пример #24
0
    def __call__(self,
                 y_true,
                 y_pred,
                 sample_weight=None,
                 regularization_losses=None):
        """Computes the overall loss.

    Args:
      y_true: An arbitrary structure of Tensors representing the ground truth.
      y_pred: An arbitrary structure of Tensors representing a Model's outputs.
      sample_weight: An arbitrary structure of Tensors representing the
        per-sample loss weights. If one Tensor is passed, it is used for all
        losses. If multiple Tensors are passed, the structure should match
        `y_pred`.
      regularization_losses: Additional losses to be added to the total loss.

    Returns:
      Tuple of `(total_loss, per_output_loss_list)`
    """
        y_true = self._conform_to_outputs(y_pred, y_true)
        sample_weight = self._conform_to_outputs(y_pred, sample_weight)

        if not self._built:
            self.build(y_pred)

        y_pred = tf.nest.flatten(y_pred)
        y_true = tf.nest.flatten(y_true)
        sample_weight = tf.nest.flatten(sample_weight)

        loss_values = []  # Used for gradient calculation.
        loss_metric_values = []  # Used for loss metric calculation.
        batch_dim = None
        zip_args = (y_true, y_pred, sample_weight, self._losses,
                    self._loss_weights, self._per_output_metrics)
        for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args):
            if y_t is None or loss_obj is None:  # Ok to have no loss for an output.
                continue

            y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw)
            sw = apply_mask(y_p, sw, get_mask(y_p))
            loss_value = loss_obj(y_t, y_p, sample_weight=sw)

            loss_metric_value = loss_value
            # Correct for the `Mean` loss metrics counting each replica as a batch.
            if loss_obj.reduction == losses_utils.ReductionV2.SUM:
                loss_metric_value *= tf.distribute.get_strategy(
                ).num_replicas_in_sync

            if batch_dim is None:
                if tf_utils.is_ragged(y_t):
                    batch_dim = y_t.nrows()
                else:
                    batch_dim = tf.compat.v1.shape(y_t)[0]

            if metric_obj is not None:
                metric_obj.update_state(loss_metric_value,
                                        sample_weight=batch_dim)

            if loss_weight is not None:
                loss_value *= loss_weight
                loss_metric_value *= loss_weight

            if (loss_obj.reduction
                    == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
                    or loss_obj.reduction == losses_utils.ReductionV2.AUTO):
                loss_value = losses_utils.scale_loss_for_distribution(
                    loss_value)

            loss_values.append(loss_value)
            loss_metric_values.append(loss_metric_value)

        if regularization_losses:
            regularization_losses = losses_utils.cast_losses_to_common_dtype(
                regularization_losses)
            reg_loss = tf.add_n(regularization_losses)
            loss_metric_values.append(reg_loss)
            loss_values.append(
                losses_utils.scale_loss_for_distribution(reg_loss))

        if loss_values:
            loss_metric_values = losses_utils.cast_losses_to_common_dtype(
                loss_metric_values)
            total_loss_metric_value = tf.add_n(loss_metric_values)
            self._loss_metric.update_state(total_loss_metric_value,
                                           sample_weight=batch_dim)

            loss_values = losses_utils.cast_losses_to_common_dtype(loss_values)
            total_loss = tf.add_n(loss_values)
            return total_loss
        else:
            # Ok for a model to have no compiled loss.
            return tf.zeros(shape=())
Пример #25
0
 def test_is_ragged_return_false_for_list(self):
     tensor = [1., 2., 3.]
     self.assertFalse(tf_utils.is_ragged(tensor))