def test_ragged(self): source = tf.ragged.constant([['X', 'YY'], ['ZZZ ZZZ']]) expected = tf.constant([['x', 'yy'], ['zzz zzz', '']]) result = lower_case(source).to_tensor(default_value='') expected, result = self.evaluate([expected, result]) self.assertAllEqual(expected, result)
def test_inference_shape(self): source = [ ['1', '2', '3'], ['4', '5', '6'], ] result = lower_case(source) self.assertAllEqual([2, 3], result.shape.as_list())
def test_actual_shape(self): source = [ ['1', '2', '3'], ['4', '5', '6'], ] result = lower_case(source) result = tf.shape(result) result = self.evaluate(result) self.assertAllEqual([2, 3], result.tolist())
def ngram_features(input_words, minn, maxn): input_words = normalize_unicode(input_words, 'NFKC') input_words = replace_string( # accentuation input_words, [u'\u0060', u' \u0301', u'\u02CA', u'\u02CB', u'\u0300', u'\u0301'], [''] * 6) input_words = lower_case(input_words) input_words = zero_digits(input_words) input_words = wrap_with(input_words, '<', '>') word_ngrams = char_ngrams(input_words, minn, maxn, itself='ALONE') return word_ngrams
def case_features(input_words): input_words = normalize_unicode(input_words, 'NFKC') words_lower = lower_case(input_words) words_upper = upper_case(input_words) words_title = title_case(input_words) has_case = tf.not_equal(words_lower, words_upper) no_case = tf.logical_not(has_case) is_lower = tf.logical_and(has_case, tf.equal(input_words, words_lower)) is_upper = tf.logical_and(has_case, tf.equal(input_words, words_upper)) is_title = tf.logical_and(has_case, tf.equal(input_words, words_title)) is_mixed = tf.logical_not( tf.logical_or(tf.logical_or(no_case, is_lower), tf.logical_or(is_upper, is_title))) return tf.cast(no_case, tf.int32), \ tf.cast(is_lower, tf.int32), \ tf.cast(is_upper, tf.int32), \ tf.cast(is_title, tf.int32), \ tf.cast(is_mixed, tf.int32)
def test_skip(self): result = lower_case([['X', '-Y-', 'z']], skip=['-Y-']) result = self.evaluate(result) self.assertAllEqual([[b'x', b'-Y-', b'z']], result)
def test_unicode(self): expected = tf.convert_to_tensor(u'тест', dtype=tf.string) result = lower_case(u'ТеСт') expected, result = self.evaluate([expected, result]) self.assertAllEqual(expected, result)
def test_2d(self): result = lower_case([['X']]) result = self.evaluate(result) self.assertAllEqual([[b'x']], result)
def test_0d(self): result = lower_case('X') result = self.evaluate(result) self.assertAllEqual(b'x', result)
def test_empty(self): result = lower_case('') result = self.evaluate(result) self.assertAllEqual(b'', result)