Exemplo n.º 1
0
 def _reduce_batch_weighted_counts(x):
     (unique_x, summed_weights_per_x, summed_positive_per_x_and_y,
      counts_per_x) = tf_utils.reduce_batch_weighted_counts(x)
     self.assertIsNone(summed_weights_per_x)
     self.assertIsNone(summed_positive_per_x_and_y)
     self.assertIsNone(counts_per_x)
     return unique_x
Exemplo n.º 2
0
def _get_approximate_vocabulary_analyzer_inputs(
    x: common_types.TensorType,
    file_format: common_types.VocabularyFileFormatType,
    weights: Optional[common_types.TensorType] = None,
) -> Tuple[common_types.TensorType, common_types.TensorType]:
    """Helper for constructing approximate vocabulary inputs from tensors.

  Args:
    x: `Tensor` or `CompositeTensor` to compute vocabulary over.
    file_format: The format of the resulting vocabulary file.
      'tfrecord_gzip' requires tensorflow>=2.4.
    weights: Optional `Tensor` of weights.

  Returns:
    A list of batch-reduced `Tensor`s to feed to vocabulary analysis.
  """
    filter_regex = analyzers.get_vocab_newline_characters_regex(
        x.dtype, file_format)
    reduced_batch = tf_utils.reduce_batch_weighted_counts(
        x, weights=weights, force=True, filter_regex=filter_regex)
    assert reduced_batch.summed_positive_per_x_and_y is None
    if weights is None:
        assert reduced_batch.summed_weights_per_x is None
        return (reduced_batch.unique_x, reduced_batch.counts_per_x)
    else:
        return (reduced_batch.unique_x, reduced_batch.summed_weights_per_x)
Exemplo n.º 3
0
    def test_reduce_batch_weighted_counts(self, x, weights, expected_results):
        x = tf.constant(x)
        if weights is not None:
            weights = tf.constant(weights)

        returned_tensors = tf_utils.reduce_batch_weighted_counts(x, weights)
        with tf.compat.v1.Session() as sess:
            results = sess.run([a for a in returned_tensors if a is not None])
            for result, expected in zip(results, expected_results):
                self.assertAllEqual(result, np.array(expected))