def test_ragged(self):
        source = tf.ragged.constant([['X', 'YY'], ['ZZZ ZZZ']])
        expected = tf.constant([['x', 'yy'], ['zzz zzz', '']])
        result = lower_case(source).to_tensor(default_value='')

        expected, result = self.evaluate([expected, result])
        self.assertAllEqual(expected, result)
    def test_inference_shape(self):
        source = [
            ['1', '2', '3'],
            ['4', '5', '6'],
        ]
        result = lower_case(source)

        self.assertAllEqual([2, 3], result.shape.as_list())
    def test_actual_shape(self):
        source = [
            ['1', '2', '3'],
            ['4', '5', '6'],
        ]
        result = lower_case(source)
        result = tf.shape(result)

        result = self.evaluate(result)
        self.assertAllEqual([2, 3], result.tolist())
示例#4
0
def ngram_features(input_words, minn, maxn):
    input_words = normalize_unicode(input_words, 'NFKC')
    input_words = replace_string(  # accentuation
        input_words,
        [u'\u0060', u' \u0301', u'\u02CA', u'\u02CB', u'\u0300', u'\u0301'],
        [''] * 6)
    input_words = lower_case(input_words)
    input_words = zero_digits(input_words)
    input_words = wrap_with(input_words, '<', '>')
    word_ngrams = char_ngrams(input_words, minn, maxn, itself='ALONE')

    return word_ngrams
示例#5
0
def case_features(input_words):
    input_words = normalize_unicode(input_words, 'NFKC')
    words_lower = lower_case(input_words)
    words_upper = upper_case(input_words)
    words_title = title_case(input_words)

    has_case = tf.not_equal(words_lower, words_upper)
    no_case = tf.logical_not(has_case)

    is_lower = tf.logical_and(has_case, tf.equal(input_words, words_lower))

    is_upper = tf.logical_and(has_case, tf.equal(input_words, words_upper))

    is_title = tf.logical_and(has_case, tf.equal(input_words, words_title))

    is_mixed = tf.logical_not(
        tf.logical_or(tf.logical_or(no_case, is_lower),
                      tf.logical_or(is_upper, is_title)))

    return tf.cast(no_case, tf.int32), \
           tf.cast(is_lower, tf.int32), \
           tf.cast(is_upper, tf.int32), \
           tf.cast(is_title, tf.int32), \
           tf.cast(is_mixed, tf.int32)
    def test_skip(self):
        result = lower_case([['X', '-Y-', 'z']], skip=['-Y-'])

        result = self.evaluate(result)
        self.assertAllEqual([[b'x', b'-Y-', b'z']], result)
    def test_unicode(self):
        expected = tf.convert_to_tensor(u'тест', dtype=tf.string)
        result = lower_case(u'ТеСт')

        expected, result = self.evaluate([expected, result])
        self.assertAllEqual(expected, result)
    def test_2d(self):
        result = lower_case([['X']])

        result = self.evaluate(result)
        self.assertAllEqual([[b'x']], result)
    def test_0d(self):
        result = lower_case('X')

        result = self.evaluate(result)
        self.assertAllEqual(b'x', result)
    def test_empty(self):
        result = lower_case('')

        result = self.evaluate(result)
        self.assertAllEqual(b'', result)