def test_unicode(self): expected = u'тостовый' result = replace_string(u'т́ест', [u'́', u'е', u'ост'], ['', u'о', u'остовый']) expected = tf.convert_to_tensor(expected, dtype=tf.string) expected, result = self.evaluate([expected, result]) self.assertAllEqual(expected, result)
def test_inference_shape(self): source = [ ['1', '2', '3'], ['4', '5', '6'], ] result = replace_string(source, ['<'], ['>']) self.assertAllEqual([2, 3], result.shape.as_list())
def test_ragged(self): source = tf.ragged.constant([['<test', 'test>'], ['test']]) expected = tf.constant([['>test', 'test>'], ['test', '']]) result = replace_string(source, ['<'], ['>']).to_tensor(default_value='') expected, result = self.evaluate([expected, result]) self.assertAllEqual(expected, result)
def test_actual_shape(self): source = [ ['1', '2', '3'], ['4', '5', '6'], ] result = replace_string(source, ['<'], ['>']) result = tf.shape(result) result = self.evaluate(result) self.assertAllEqual([2, 3], result.tolist())
def ngram_features(input_words, minn, maxn): input_words = normalize_unicode(input_words, 'NFKC') input_words = replace_string( # accentuation input_words, [u'\u0060', u' \u0301', u'\u02CA', u'\u02CB', u'\u0300', u'\u0301'], [''] * 6) input_words = lower_case(input_words) input_words = zero_digits(input_words) input_words = wrap_with(input_words, '<', '>') word_ngrams = char_ngrams(input_words, minn, maxn, itself='ALONE') return word_ngrams
def test_skip(self): result = replace_string([['<test>', '<unk>']], ['<'], ['>'], skip=['<unk>']) result = self.evaluate(result) self.assertAllEqual([[b'>test>', b'<unk>']], result)
def test_2d(self): result = replace_string([['<test>']], ['<'], ['>']) result = self.evaluate(result) self.assertAllEqual([[b'>test>']], result)
def test_0d(self): result = replace_string('<test>', ['<'], ['>']) result = self.evaluate(result) self.assertAllEqual(b'>test>', result)
def test_empty_haystack(self): result = replace_string('<test>', ['<'], ['']) result = self.evaluate(result) self.assertAllEqual(b'test>', result)
def test_empty_needle(self): with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, 'Items of "needle" could not be empty'): result = replace_string('<test>', [''], ['>']) result = self.evaluate(result) self.assertAllEqual(b'test', result)
def test_empty(self): result = replace_string('', ['<'], ['>']) result = self.evaluate(result) self.assertAllEqual(b'', result)