def testRaggedMapOnStructure_RaggedOutputs(self):
    batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
    # [[10, 20, 30], [40], [50, 60, 70]]
    robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

    features = {'batman': batman, 'robin': robin}

    def _increment(f):
      return {
          'batman': f['batman'] + 1,
          'robin': f['robin'] + 1,
      }

    output = ragged_map_ops.map_fn(
        fn=_increment,
        elems=features,
        infer_shape=False,
        dtype={
            'batman':
                ragged_tensor.RaggedTensorType(
                    dtype=dtypes.int32, ragged_rank=1),
            'robin':
                ragged_tensor.RaggedTensorType(
                    dtype=dtypes.int32, ragged_rank=1)
        },
    )

    self.assertRaggedEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]])
    self.assertRaggedEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
Exemplo n.º 2
0
    def testRaggedMapOnStructure_RaggedOutputs(self):
        batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
        # [[10, 20, 30], [40], [50, 60, 70]]
        robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

        features = {'batman': batman, 'robin': robin}

        def _increment(f):
            return {
                'batman': f['batman'] + 1,
                'robin': f['robin'] + 1,
            }

        output = ragged_map_ops.map_fn(
            fn=_increment,
            elems=features,
            infer_shape=False,
            dtype={
                'batman':
                ragged_tensor.RaggedTensorType(dtype=dtypes.int32,
                                               ragged_rank=1),
                'robin':
                ragged_tensor.RaggedTensorType(dtype=dtypes.int32,
                                               ragged_rank=1)
            },
        )

        self.assertRaggedEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]])
        self.assertRaggedEqual(output['robin'],
                               [[11, 21, 31], [41], [51, 61, 71]])
Exemplo n.º 3
0
 def testMismatchRaggedRank(self):
     elems = ragged_factory_ops.constant([[[1, 2, 3]], [[4, 5], [6, 7]]])
     fn = lambda x: ragged_math_ops.reduce_sum(x, axis=0)
     with self.assertRaisesRegex(
             ValueError, r'(?s)Expected `fn` to return.*But it returned.*'):
         _ = ragged_map_ops.map_fn(fn,
                                   elems,
                                   dtype=ragged_tensor.RaggedTensorType(
                                       dtype=dtypes.int64, ragged_rank=23))
Exemplo n.º 4
0
 def testMismatchRaggedRank2(self):
     elems = ragged_factory_ops.constant([[1, 2, 3], [4, 5], [6, 7]])
     fn = lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0])
     with self.assertRaisesRegex(
             ValueError, r'(?s)Expected `fn` to return.*But it returned.*'):
         _ = ragged_map_ops.map_fn(fn,
                                   elems,
                                   dtype=ragged_tensor.RaggedTensorType(
                                       dtype=dtypes.int64, ragged_rank=10))
 def testMismatchRaggedRank2(self):
   elems = ragged_factory_ops.constant([[1, 2, 3], [4, 5], [6, 7]])
   fn = lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0])
   with self.assertRaisesWithLiteralMatch(
       ValueError, r'The declared ragged rank (10) mismatches the result (2)'):
     _ = ragged_map_ops.map_fn(
         fn,
         elems,
         dtype=ragged_tensor.RaggedTensorType(
             dtype=dtypes.int64, ragged_rank=10))
Exemplo n.º 6
0
 def testMismatchRaggedRank(self):
     elems = ragged_factory_ops.constant([[[1, 2, 3]], [[4, 5], [6, 7]]])
     fn = lambda x: ragged_math_ops.reduce_sum(x, axis=0)
     with self.assertRaisesWithLiteralMatch(
             ValueError,
             r'The declared ragged rank (23) mismatches the result (1)'):
         _ = ragged_map_ops.map_fn(fn,
                                   elems,
                                   dtype=ragged_tensor.RaggedTensorType(
                                       dtype=dtypes.int64, ragged_rank=23))
Exemplo n.º 7
0
 def testMismatchRaggedRank2(self):
     elems = ragged_factory_ops.constant([[1, 2, 3], [4, 5], [6, 7]])
     fn = lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0])
     with self.assertRaisesWithLiteralMatch(
             ValueError,
             r'The declared ragged rank (10) mismatches the result (2)'):
         _ = ragged_map_ops.map_fn(fn,
                                   elems,
                                   dtype=ragged_tensor.RaggedTensorType(
                                       dtype=dtypes.int64, ragged_rank=10))
 def testMismatchRaggedRank(self):
   elems = ragged_factory_ops.constant([[[1, 2, 3]], [[4, 5], [6, 7]]])
   fn = lambda x: ragged_math_ops.reduce_sum(x, axis=0)
   with self.assertRaisesWithLiteralMatch(
       ValueError, r'The declared ragged rank (23) mismatches the result (1)'):
     _ = ragged_map_ops.map_fn(
         fn,
         elems,
         dtype=ragged_tensor.RaggedTensorType(
             dtype=dtypes.int64, ragged_rank=23))
Exemplo n.º 9
0
 def testMapOnSparseTensor(self):
   s = sparse_tensor.SparseTensor(
       indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
       values=[0, 5, 0, 4],
       dense_shape=[2, 2],
   )
   t2 = ragged_tensor.RaggedTensor.from_sparse(s)
   id_t2 = ragged_map_ops.map_fn(
       lambda x: x, t2,
   )
   self.assertRaggedEqual(id_t2, [[0, 5], [0, 4]])
Exemplo n.º 10
0
 def testMapOnSparseTensor(self):
   s = sparse_tensor.SparseTensor(
       indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
       values=[0, 5, 0, 4],
       dense_shape=[2, 2],
   )
   t2 = ragged_tensor.RaggedTensor.from_sparse(s)
   id_t2 = ragged_map_ops.map_fn(
       lambda x: x, t2,
   )
   self.assertRaggedEqual(id_t2, [[0, 5], [0, 4]])
Exemplo n.º 11
0
  def getTokenWord(self, text, token_starts, token_ends):
    def _FindSubstr(input_tensor):
      text, token_start, token_length = input_tensor
      return tf.strings.substr(text, token_start, token_length)

    token_lengths = token_ends - token_starts
    token_word = ragged_map_ops.map_fn(
        _FindSubstr, (text, token_starts, token_lengths),
        dtype=ragged_tensor.RaggedTensorType(dtype=tf.string, ragged_rank=1),
        infer_shape=False)
    return token_word
    def break_sentences_with_offsets(self, doc):
        """Splits `doc` into sentence fragments, returns text, start & end offsets.

    Example:
                      1                  1         2         3
            012345678901234    01234567890123456789012345678901234567
      doc: 'Hello...foo bar', 'Welcome to the U.S. don't be surprised'

      fragment_text: [['Hello...', 'foo bar'], ['Welcome to the U.S.' , 'don't
      be surprised']]
      start: [[0, 8],[0, 20]]
      end: [[8, 15],[19, 38]]

    Args:
      doc: A string `Tensor` of shape [batch] with a batch of documents.

    Returns:
      A tuple of (fragment_text, start, end) where:

      fragment_text: A string `RaggedTensor` of shape [batch, (num_sentences)]
      with each input broken up into its constituent sentence fragments.
      start: A int64 `RaggedTensor` of shape [batch, (num_sentences)]
        where each entry is the inclusive beginning byte offset of a sentence.
      end: A int64 `RaggedTensor` of shape [batch, (num_sentences)]
        where each entry is the exclusive ending byte offset of a sentence.
    """
        if doc.shape.ndims > 1:
            doc = ragged_tensor.RaggedTensor.from_tensor(doc)
            doc = doc.flat_values

        # Run sentence fragmenter op v2
        fragment = gen_state_based_sentence_breaker_op.sentence_fragments_v2(
            doc)
        start, end, properties, terminal_punc_token, row_lengths = fragment

        # Pack and create `RaggedTensor`s
        start, end, properties, terminal_punc_token = tuple(
            ragged_tensor.RaggedTensor.from_row_lengths(value, row_lengths)
            for value in [start, end, properties, terminal_punc_token])

        # Helper for use within map_fn (function must only take in one argument)
        def _substring(x):
            s, pos, length = x
            return string_ops.substr(s, pos, length)

        # Extract fragment text using offsets
        fragment_text = ragged_map_ops.map_fn(
            _substring, (doc, start, math_ops.subtract(end, start)),
            infer_shape=False,
            dtype=dtypes.string)

        return fragment_text, start, end
Exemplo n.º 13
0
  def testRaggedMapOnStructure(self):
    batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
    # [[10, 20, 30], [40], [50, 60, 70]]
    robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

    features = {'batman': batman, 'robin': robin}

    def _reduce_sum_from_all(f):
      return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin'])

    output = ragged_map_ops.map_fn(
        fn=_reduce_sum_from_all,
        elems=features,
        dtype=dtypes.int32,
    )

    self.assertRaggedEqual(output, [66, 44, 198])
Exemplo n.º 14
0
    def testRaggedMapOnStructure(self):
        batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
        # [[10, 20, 30], [40], [50, 60, 70]]
        robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

        features = {'batman': batman, 'robin': robin}

        def _reduce_sum_from_all(f):
            return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin'])

        output = ragged_map_ops.map_fn(
            fn=_reduce_sum_from_all,
            elems=features,
            dtype=dtypes.int32,
        )

        self.assertAllEqual(output, [66, 44, 198])
def _ragged_set_op(set_op,
                   pred_begin,
                   pred_end,
                   gold_begin,
                   gold_end,
                   pred_label=None,
                   gold_label=None):
    """Computes a sets op on RaggedTensor of prediction and labelled spans.

  This op computes `set_op` (an op from tf.sets) on prediction and gold labelled
  spans and returns the number of results from `set_op`.
  Args:
    set_op: A callable tf.sets operation
    pred_begin: A `RaggedTensor` of shape [batch, (num_spans)] containing the
      beginning indices of a prediction span.
    pred_end: A `RaggedTensor` of shape [batch, (num_spans)] containing the
      ending indices of a prediction span.
    gold_begin: A `RaggedTensor` of shape [batch, (num_spans)] containing the
      beginning indices of a gold labelled span.
    gold_end: A `RaggedTensor` of shape [batch, (num_spans)] containing the
      ending indices of a gold labelled span.
    pred_label: (optional) A `RaggedTensor` of shape [batch, (num_spans)]
      containing the prediction label types. If not provided, assumes all spans
      are of the same type.
    gold_label: (optional) A `RaggedTensor` of shape [batch, (num_spans)]
      containing the gold label types. If not provided, assumes all spans are of
      the same type.

  Returns:
    A 1-D Tensor containing the number of elements in the results of
    `set_op`.
  """
    op = functools.partial(_per_batch_set_op, set_op)
    if pred_label is None:
        pred_label = pred_begin.with_flat_values(
            array_ops.zeros_like(pred_begin.flat_values))
    if gold_label is None:
        gold_label = gold_begin.with_flat_values(
            array_ops.zeros_like(gold_begin.flat_values))
    results = ragged_map_ops.map_fn(
        op,
        (pred_begin, pred_end, pred_label, gold_begin, gold_end, gold_label),
        dtype=(dtypes.int32),
        infer_shape=False)
    return results
Exemplo n.º 16
0
  def testZip(self):
    x = ragged_factory_ops.constant(
        [[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]], dtypes.int64)
    y = array_ops.expand_dims(mo.range(x.nrows(out_type=dtypes.int64)), axis=1)

    def _zip(foo):
      y_val, x_val = foo
      bar = backend.tile(y_val, array_ops.shape(x_val))
      return array_ops.stack([bar, x_val], axis=1)

    output = ragged_map_ops.map_fn(
        _zip, (y, x),
        dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.int64, ragged_rank=1),
        infer_shape=False)

    self.assertRaggedEqual(
        output, [[[0, 10], [0, 20]], [[1, 30], [1, 40]], [[2, 50], [2, 60]],
                 [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
Exemplo n.º 17
0
  def testZip(self):
    x = ragged_factory_ops.constant(
        [[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]], dtypes.int64)
    y = array_ops.expand_dims(mo.range(x.nrows(), dtype=dtypes.int64), axis=1)

    def _zip(foo):
      y_val, x_val = foo
      bar = backend.tile(y_val, array_ops.shape(x_val))
      return array_ops.stack([bar, x_val], axis=1)

    output = ragged_map_ops.map_fn(
        _zip, (y, x),
        dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.int64, ragged_rank=1),
        infer_shape=False)

    self.assertRaggedEqual(
        output, [[[0, 10], [0, 20]], [[1, 30], [1, 40]], [[2, 50], [2, 60]],
                 [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
Exemplo n.º 18
0
  def testRaggedMap(
      self,
      fn,
      elems,
      expected_output,
      expected_ragged_rank=None,
      result_ragged_rank=None,
      elems_ragged_rank=None,
      dtype=dtypes.int64,
      result_dtype=None,
      infer_shape=False,
  ):
    elems = ragged_factory_ops.constant(elems, dtype, elems_ragged_rank)
    output = ragged_map_ops.map_fn(
        fn=fn, elems=elems, dtype=result_dtype, infer_shape=infer_shape)

    expected_rt = ragged_factory_ops.constant(
        expected_output, ragged_rank=expected_ragged_rank)
    self.assertRaggedEqual(expected_rt, output)
Exemplo n.º 19
0
    def testBatchGather(self):
        tokens = ragged_factory_ops.constant([['hello', '.', 'there'],
                                              ['merhaba'],
                                              ['bonjour', '.', 'ca va', '?']])
        indices = ragged_factory_ops.constant([[0, 2], [0], [0, 2]])

        def gather(x):
            tokens_val, indices_val = x
            return array_ops.gather(tokens_val, indices_val)

        data = tokens, indices
        out = ragged_map_ops.map_fn(gather,
                                    data,
                                    dtype=ragged_tensor.RaggedTensorType(
                                        dtype=dtypes.string, ragged_rank=1),
                                    infer_shape=False)

        self.assertAllEqual(
            out, [[b'hello', b'there'], [b'merhaba'], [b'bonjour', b'ca va']])
Exemplo n.º 20
0
  def testRaggedMap(
      self,
      fn,
      elems,
      expected_output,
      expected_ragged_rank=None,
      result_ragged_rank=None,
      elems_ragged_rank=None,
      dtype=dtypes.int64,
      result_dtype=None,
      infer_shape=False,
  ):
    elems = ragged_factory_ops.constant(elems, dtype, elems_ragged_rank)
    output = ragged_map_ops.map_fn(
        fn=fn, elems=elems, dtype=result_dtype, infer_shape=infer_shape)

    expected_rt = ragged_factory_ops.constant(
        expected_output, ragged_rank=expected_ragged_rank)
    self.assertRaggedEqual(expected_rt, output)
Exemplo n.º 21
0
  def testBatchGather(self):
    tokens = ragged_factory_ops.constant([['hello', '.', 'there'], ['merhaba'],
                                          ['bonjour', '.', 'ca va', '?']])
    indices = ragged_factory_ops.constant([[0, 2], [0], [0, 2]])

    def gather(x):
      tokens_val, indices_val = x
      return array_ops.gather(tokens_val, indices_val)

    data = tokens, indices
    out = ragged_map_ops.map_fn(
        gather,
        data,
        dtype=ragged_tensor.RaggedTensorType(
            dtype=dtypes.string, ragged_rank=1),
        infer_shape=False)

    self.assertRaggedEqual(
        out, [[b'hello', b'there'], [b'merhaba'], [b'bonjour', b'ca va']])
Exemplo n.º 22
0
    def get_segments(self, sentences):
        """Extracts the next sentence label from sentences.

    Args:
      sentences: A `RaggedTensor` of strings w/ shape [batch, (num_sentences)].

    Returns:
      A tuple of (segment_a, segment_b, is_next_sentence) where:

      segment_a: A `Tensor` of strings w/ shape [total_num_sentences] that
        contains all the original sentences.
      segment_b:  A `Tensor` with shape [num_sentences] that contains
        either the subsequent sentence of `segment_a` or a randomly injected
        sentence.
      is_next_sentence: A `Tensor` of bool w/ shape [num_sentences]
        that contains whether or not `segment_b` is truly a subsequent sentence
        or not.
    """
        next_sentence = ragged_map_ops.map_fn(
            functools.partial(manip_ops.roll, axis=0, shift=-1),
            sentences,
            dtype=ragged_tensor.RaggedTensorType(dtypes.string, 1),
            infer_shape=False)
        random_sentence = sentences.with_flat_values(
            self._shuffle_fn(sentences.flat_values))
        is_next_sentence_labels = (self._random_fn(sentences.flat_values.shape)
                                   > self._random_next_sentence_threshold)
        is_next_sentence = sentences.with_flat_values(is_next_sentence_labels)

        # Randomly decide if we should use next sentence or throw in a random
        # sentence.
        segment_two = ragged_where_op.where(is_next_sentence,
                                            x=next_sentence,
                                            y=random_sentence)

        # Get rid of the docs dimensions
        sentences = sentences.merge_dims(-2, -1)
        segment_two = segment_two.merge_dims(-2, -1)
        is_next_sentence = is_next_sentence.merge_dims(-2, -1)
        is_next_sentence = math_ops.cast(is_next_sentence, dtypes.int64)
        return sentences, segment_two, is_next_sentence
def _ragged_substr(text_input, begin, end):
  text_input_flat = None
  if ragged_tensor.is_ragged(text_input):
    text_input_flat = text_input.flat_values
  else:
    text_input_flat = text_input

  def _ragged_tile(x):
    input_text, indices = x
    multiple = math_ops.reduce_sum(indices.row_lengths())
    return array_ops.tile([input_text], [multiple])

  broadcasted_text = ragged_map_ops.map_fn(
      _ragged_tile,
      (text_input_flat, begin),
      dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.string, ragged_rank=1),
      infer_shape=False,
  )
  size = math_ops.sub(
      array_ops.squeeze(end.flat_values), array_ops.squeeze(begin.flat_values))
  new_tokens = string_ops.substr_v2(broadcasted_text,
                                    array_ops.squeeze(begin.flat_values), size)
  return begin.with_flat_values(new_tokens.flat_values)
Exemplo n.º 24
0
 def testDefaultAttrValues(self):
     ragged_map_ops.map_fn(fn=lambda x: x,
                           elems=ragged_factory_ops.constant([[7]]),
                           dtype=ragged_tensor.RaggedTensorType(
                               dtype=dtypes.int32, ragged_rank=1))