Пример #1
0
  def call(self, inputs):
    if not self.max_tokens and not self._vocab_size:
      raise ValueError("You must set the layer's vocabulary before calling it. "
                       "Either pass a `vocabulary` argument to the layer, or "
                       "call `layer.adapt(dataset)` with some sample data.")
    self._called = True
    if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32:
      inputs = math_ops.cast(inputs, dtypes.int64)
    lookup_result = self._table_handler.lookup(inputs)

    if self.output_mode == INT:
      return lookup_result

    binary_output = (self.output_mode == BINARY)
    if self._vocab_size and not self.pad_to_max_tokens:
      out_depth = self._vocab_size
    else:
      out_depth = self.max_tokens
    if self.sparse:
      bincounts = category_encoding.sparse_bincount(lookup_result, out_depth,
                                                    binary_output)
    else:
      bincounts = category_encoding.dense_bincount(lookup_result, out_depth,
                                                   binary_output)

    if self.output_mode == TFIDF:
      return math_ops.multiply(bincounts, self.tf_idf_weights)

    return bincounts
Пример #2
0
  def call(self, inputs):
    if isinstance(inputs, (list, tuple, np.ndarray)):
      inputs = ops.convert_to_tensor_v2_with_dispatch(inputs)

    if not self.max_tokens and self._vocab_size is None:
      raise ValueError("You must set the layer's vocabulary before calling it. "
                       "Either pass a `vocabulary` argument to the layer, or "
                       "call `layer.adapt(dataset)` with some sample data.")
    self._called = True
    if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32:
      inputs = math_ops.cast(inputs, dtypes.int64)
    lookup_result = self._table_handler.lookup(inputs)

    lookup_checks = []

    if self.num_oov_indices == 0 and not self.invert:
      if tf_utils.is_sparse(inputs):
        lookup_values = lookup_result.values
        input_values = inputs.values
      elif tf_utils.is_ragged(inputs):
        lookup_values = lookup_result.flat_values
        input_values = inputs.flat_values
      else:
        lookup_values = lookup_result
        input_values = inputs
      oov_indices = array_ops.where_v2(math_ops.equal(lookup_values, -1))
      oov_inputs = array_ops.gather_nd(input_values, oov_indices)
      msg = string_ops.string_format(
          "When `num_oov_indices=0` all inputs should be in vocabulary, "
          "found OOV values {}, consider setting `num_oov_indices=1`.",
          (oov_inputs,))
      assertion = control_flow_ops.Assert(
          math_ops.equal(array_ops.size(oov_indices), 0), [msg])
      lookup_checks.append(assertion)

    with ops.control_dependencies(lookup_checks):
      if self.output_mode == INT:
        return array_ops.identity(lookup_result)

      multi_hot_output = (self.output_mode == MULTI_HOT)
      if self._vocab_size and not self.pad_to_max_tokens:
        out_depth = self._vocab_size
      else:
        out_depth = self.max_tokens
      if self.sparse:
        bincounts = category_encoding.sparse_bincount(lookup_result, out_depth,
                                                      multi_hot_output)
      else:
        bincounts = category_encoding.dense_bincount(lookup_result, out_depth,
                                                     multi_hot_output)

      if self.output_mode == TF_IDF:
        return math_ops.multiply(bincounts, self.tf_idf_weights)

      return bincounts
Пример #3
0
  def call(self, inputs):
    if not self.max_tokens:
      raise ValueError("You must set the layer's vocabulary before calling it. "
                       "Either pass a `vocabulary` argument to the layer, or "
                       "call `layer.adapt(dataset)` with some sample data.")
    if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32:
      inputs = math_ops.cast(inputs, dtypes.int64)
    lookup_result = self._table_handler.lookup(inputs)
    if self.output_mode == INT:
      return lookup_result

    binary_output = (self.output_mode == BINARY)
    if self.sparse:
      return category_encoding.sparse_bincount(
          lookup_result, self.max_tokens, binary_output)
    else:
      return category_encoding.dense_bincount(
          lookup_result, self.max_tokens, binary_output)
    def _encode_output(self, lookup_result):
        def expand_dims(inputs, axis):
            if tf_utils.is_sparse(inputs):
                return sparse_ops.sparse_expand_dims(inputs, axis)
            else:
                return array_ops.expand_dims(inputs, axis)

        original_shape = lookup_result.shape
        # In all cases, we should uprank scalar input to a single sample.
        if lookup_result.shape.rank == 0:
            lookup_result = expand_dims(lookup_result, -1)
        # One hot will unprank only if the final output dimension is not already 1.
        if self.output_mode == ONE_HOT:
            if lookup_result.shape[-1] != 1:
                lookup_result = expand_dims(lookup_result, -1)

        # TODO(b/190445202): remove output rank restriction.
        if lookup_result.shape.rank > 2:
            raise ValueError(
                "Received input shape {}, which would result in output rank {}. "
                "Currently only outputs up to rank 2 are supported for "
                "`output_mode={}`.".format(original_shape,
                                           lookup_result.shape.rank,
                                           self.output_mode))

        binary_output = self.output_mode in (MULTI_HOT, ONE_HOT)
        if self._vocab_size and not self.pad_to_max_tokens:
            out_depth = self._vocab_size
        else:
            out_depth = self.max_tokens
        if self.sparse:
            bincounts = category_encoding.sparse_bincount(
                lookup_result, out_depth, binary_output)
        else:
            bincounts = category_encoding.dense_bincount(
                lookup_result, out_depth, binary_output)

        if self.output_mode == TF_IDF:
            return math_ops.multiply(bincounts, self.tf_idf_weights)

        return bincounts