def test_encode_decode(self): nonunicode_text = 'hello world' unicode_text = u'你好,世界' expectations = [ # Non-unicode test_utils.FeatureExpectation( name='text', feature=features.Text(), value=nonunicode_text, expected=tf.compat.as_bytes(nonunicode_text)), # Unicode test_utils.FeatureExpectation( name='text_unicode', feature=features.Text(), value=unicode_text, expected=tf.compat.as_bytes(unicode_text)), ] specs = features.SpecDict( {exp.name: exp.feature for exp in expectations}) decoded_sample = test_utils.features_encode_decode( specs, dict([(exp.name, exp.value) for exp in expectations])) for exp in expectations: self.assertAllEqual(decoded_sample[exp.name], exp.expected)
def test_save_load_metadata(self): text_f = features.Text(encoder=text_encoder.ByteTextEncoder( additional_tokens=['HI'])) text = u'HI 你好' ids = text_f.str2ints(text) self.assertEqual(1, ids[0]) with testing.tmp_dir(self.get_temp_dir()) as data_dir: feature_name = 'dummy' text_f.save_metadata(data_dir, feature_name) new_f = features.Text() new_f.load_metadata(data_dir, feature_name) self.assertEqual(ids, text_f.str2ints(text))
def expectations(self): nonunicode_text = 'hello world' unicode_text = u'你好' return [ test_utils.FeatureExpectation( name='text', feature=features.Text(), shape=(), dtype=tf.string, tests=[ # Non-unicode test_utils.FeatureExpectationItem( value=nonunicode_text, expected=tf.compat.as_bytes(nonunicode_text), ), # Unicode test_utils.FeatureExpectationItem( value=unicode_text, expected=tf.compat.as_bytes(unicode_text), ), # Empty string test_utils.FeatureExpectationItem( value='', expected=tf.compat.as_bytes(''), ), ], ), # Unicode integer-encoded by byte test_utils.FeatureExpectation( name='text_unicode_encoded', feature=features.Text(encoder=text_encoder.ByteTextEncoder()), shape=(None, ), dtype=tf.int64, tests=[ test_utils.FeatureExpectationItem( value=unicode_text, expected=[ i + 1 for i in [228, 189, 160, 229, 165, 189] ], ), # Empty string test_utils.FeatureExpectationItem( value='', expected=[], ), ], ), ]
def test_text(self): nonunicode_text = 'hello world' unicode_text = u'你好' self.assertFeature( feature=features.Text(), shape=(), dtype=tf.string, tests=[ # Non-unicode testing.FeatureExpectationItem( value=nonunicode_text, expected=tf.compat.as_bytes(nonunicode_text), ), # Unicode testing.FeatureExpectationItem( value=unicode_text, expected=tf.compat.as_bytes(unicode_text), ), # Empty string testing.FeatureExpectationItem( value='', expected=tf.compat.as_bytes(''), ), ], )
def _info(self) -> dataset_info.DatasetInfo: return dataset_info.DatasetInfo( builder=self, description='Generic text translation dataset.', features=features_lib.FeaturesDict({ lang: features_lib.Text() for lang in self._languages }), )
def _info(self) -> dataset_info.DatasetInfo: return dataset_info.DatasetInfo( builder=self, description='Generic image classification dataset.', features=features_lib.FeaturesDict({ 'image': features_lib.Image(), 'label': features_lib.ClassLabel(), 'image/filename': features_lib.Text(), }), supervised_keys=('image', 'label'), )
def test_text_encoded(self): unicode_text = u'你好' # Unicode integer-encoded by byte self.assertFeature( feature=features.Text(encoder=text_encoder.ByteTextEncoder()), shape=(None, ), dtype=tf.int64, tests=[ testing.FeatureExpectationItem( value=unicode_text, expected=[i + 1 for i in [228, 189, 160, 229, 165, 189]], ), # Empty string testing.FeatureExpectationItem( value='', expected=[], ), ], )
def test_text_conversion(self): text_f = features.Text(encoder=text_encoder.ByteTextEncoder()) text = u'你好' self.assertEqual(text, text_f.ints2str(text_f.str2ints(text)))