Exemplo n.º 1
0
    def test_none(self):
        result = char_ngrams('123', 4, 5, itself='ASIS')
        self.assertTrue(tf.is_tensor(result))
        self.assertNotIsInstance(result, tf.RaggedTensor)

        result = self.evaluate(result)
        self.assertAllEqual([], result.tolist())
Exemplo n.º 2
0
    def test_alone_inside(self):
        result = char_ngrams('123', 2, 3, itself='ALONE')
        self.assertTrue(tf.is_tensor(result))
        self.assertNotIsInstance(result, tf.RaggedTensor)

        result = self.evaluate(result)
        self.assertAllEqual([b'12', b'23'], result.tolist())
Exemplo n.º 3
0
    def test_as_is_below(self):
        result = char_ngrams('1234', 2, 3, itself='ASIS')
        self.assertTrue(tf.is_tensor(result))
        self.assertNotIsInstance(result, tf.RaggedTensor)

        result = self.evaluate(result)
        self.assertAllEqual([b'12', b'23', b'34', b'123', b'234'],
                            result.tolist())
Exemplo n.º 4
0
    def test_default_2d(self):
        expected = tf.constant([[['x', 'y'], ['x', '']]], dtype=tf.string)
        result = char_ngrams([['xy', 'x']], 1, 1, itself='ASIS')
        self.assertIsInstance(result, tf.RaggedTensor)
        result = result.to_tensor(default_value='')

        expected, result = self.evaluate([expected, result])
        self.assertAllEqual(expected, result)
Exemplo n.º 5
0
    def test_0d(self):
        expected = tf.constant(['x', 'y'], dtype=tf.string)
        result = char_ngrams('xy', 1, 1, itself='NEVER')
        self.assertTrue(tf.is_tensor(result))
        self.assertNotIsInstance(result, tf.RaggedTensor)

        expected, result = self.evaluate([expected, result])
        self.assertAllEqual(expected, result)
Exemplo n.º 6
0
    def test_inference_shape(self):
        source = [
            ['1', '2', '3'],
            ['4', '5', '6'],
        ]
        result = char_ngrams(source, 1, 1, itself='ALWAYS')

        self.assertEqual([2, 3, None], result.shape.as_list())
Exemplo n.º 7
0
    def test_actual_shape(self):
        source = [
            ['1', '2', '3'],
            ['4', '5', '6'],
        ]
        result = char_ngrams(source, 1, 1, itself='ALWAYS')
        self.assertIsInstance(result, tf.RaggedTensor)
        result = result.to_tensor(default_value='')

        result = self.evaluate(result)
        self.assertAllEqual((2, 3, 1), result.shape)
Exemplo n.º 8
0
def ngram_features(input_words, minn, maxn):
    input_words = normalize_unicode(input_words, 'NFKC')
    input_words = replace_string(  # accentuation
        input_words,
        [u'\u0060', u' \u0301', u'\u02CA', u'\u02CB', u'\u0300', u'\u0301'],
        [''] * 6)
    input_words = lower_case(input_words)
    input_words = zero_digits(input_words)
    input_words = wrap_with(input_words, '<', '>')
    word_ngrams = char_ngrams(input_words, minn, maxn, itself='ALONE')

    return word_ngrams
Exemplo n.º 9
0
    def test_ragged(self):
        expected = tf.constant([[
            ['a', 'b', 'ab', '', ''],
            ['c', ' ', 'd', 'c ', ' d'],
        ], [['e', '', '', '', ''], ['', '', '', '', '']]])
        result = char_ngrams(tf.ragged.constant([['ab', 'c d'], ['e']]),
                             1,
                             2,
                             itself='ASIS')
        self.assertIsInstance(result, tf.RaggedTensor)
        result = result.to_tensor(default_value='')

        expected, result = self.evaluate([expected, result])
        self.assertAllEqual(expected, result)