def analyzer_fn(inputs):
   key_vocab, q_b, scale_factor_per_key, shift_per_key, num_buckets = (
       analyzers._quantiles_per_key(
           inputs['x'], inputs['key'], num_buckets=3, epsilon=0.00001))
   return {
       'key_vocab': key_vocab,
       'q_b': q_b,
       'scale_factor_per_key': scale_factor_per_key,
       'shift_per_key': shift_per_key,
       'num_buckets': num_buckets,
   }
Exemplo n.º 2
0
def bucketize_per_key(x, key, num_buckets, epsilon=None, name=None):
    """Returns a bucketized column, with a bucket index assigned to each input.

  Args:
    x: A numeric input `Tensor` or `SparseTensor` with rank 1, whose values
      should be mapped to buckets.  `SparseTensor`s will have their non-missing
      values mapped and missing values left as missing.
    key: A Tensor with the same shape as `x` and dtype tf.string.  If `x` is
      a `SparseTensor`, `key` must exactly match `x` in everything except
      values, i.e. indices and dense_shape must be identical.
    num_buckets: Values in the input `x` are divided into approximately
      equal-sized buckets, where the number of buckets is num_buckets.
    epsilon: (Optional) see `bucketize`
    name: (Optional) A name for this operation.

  Returns:
    A `Tensor` of the same shape as `x`, with each element in the
    returned tensor representing the bucketized value. Bucketized value is
    in the range [0, actual_num_buckets).

  Raises:
    ValueError: If value of num_buckets is not > 1.
  """
    with tf.name_scope(name, 'bucketize_per_key'):
        if not isinstance(num_buckets, int):
            raise TypeError('num_buckets must be an int, got {}'.format(
                type(num_buckets)))

        if num_buckets < 1:
            raise ValueError('Invalid num_buckets {}'.format(num_buckets))

        if epsilon is None:
            # See explanation in args documentation for epsilon.
            epsilon = min(1.0 / num_buckets, 0.01)

        key_vocab, bucket_boundaries = analyzers._quantiles_per_key(  # pylint: disable=protected-access
            x.values if isinstance(x, tf.SparseTensor) else x,
            key.values if isinstance(key, tf.SparseTensor) else key,
            num_buckets, epsilon)
        return _apply_buckets_with_keys(x, key, key_vocab, bucket_boundaries)