def test_unicode(self):
        expected = u'тостовый'
        result = replace_string(u'т́ест', [u'́', u'е', u'ост'],
                                ['', u'о', u'остовый'])
        expected = tf.convert_to_tensor(expected, dtype=tf.string)

        expected, result = self.evaluate([expected, result])
        self.assertAllEqual(expected, result)
    def test_inference_shape(self):
        source = [
            ['1', '2', '3'],
            ['4', '5', '6'],
        ]
        result = replace_string(source, ['<'], ['>'])

        self.assertAllEqual([2, 3], result.shape.as_list())
    def test_ragged(self):
        source = tf.ragged.constant([['<test', 'test>'], ['test']])
        expected = tf.constant([['>test', 'test>'], ['test', '']])
        result = replace_string(source, ['<'],
                                ['>']).to_tensor(default_value='')

        expected, result = self.evaluate([expected, result])
        self.assertAllEqual(expected, result)
    def test_actual_shape(self):
        source = [
            ['1', '2', '3'],
            ['4', '5', '6'],
        ]
        result = replace_string(source, ['<'], ['>'])
        result = tf.shape(result)

        result = self.evaluate(result)
        self.assertAllEqual([2, 3], result.tolist())
Exemplo n.º 5
0
def ngram_features(input_words, minn, maxn):
    input_words = normalize_unicode(input_words, 'NFKC')
    input_words = replace_string(  # accentuation
        input_words,
        [u'\u0060', u' \u0301', u'\u02CA', u'\u02CB', u'\u0300', u'\u0301'],
        [''] * 6)
    input_words = lower_case(input_words)
    input_words = zero_digits(input_words)
    input_words = wrap_with(input_words, '<', '>')
    word_ngrams = char_ngrams(input_words, minn, maxn, itself='ALONE')

    return word_ngrams
    def test_skip(self):
        result = replace_string([['<test>', '<unk>']], ['<'], ['>'],
                                skip=['<unk>'])

        result = self.evaluate(result)
        self.assertAllEqual([[b'>test>', b'<unk>']], result)
    def test_2d(self):
        result = replace_string([['<test>']], ['<'], ['>'])

        result = self.evaluate(result)
        self.assertAllEqual([[b'>test>']], result)
    def test_0d(self):
        result = replace_string('<test>', ['<'], ['>'])

        result = self.evaluate(result)
        self.assertAllEqual(b'>test>', result)
    def test_empty_haystack(self):
        result = replace_string('<test>', ['<'], [''])

        result = self.evaluate(result)
        self.assertAllEqual(b'test>', result)
Exemplo n.º 10
0
 def test_empty_needle(self):
     with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
                                  'Items of "needle" could not be empty'):
         result = replace_string('<test>', [''], ['>'])
         result = self.evaluate(result)
         self.assertAllEqual(b'test', result)
Exemplo n.º 11
0
    def test_empty(self):
        result = replace_string('', ['<'], ['>'])

        result = self.evaluate(result)
        self.assertAllEqual(b'', result)