def test_source_data_files(self): wmt_8k = translate_ende.TranslateEndeWmt8k() wmt_32k = translate_ende.TranslateEndeWmt32k() eval_split = problem.DatasetSplit.EVAL train_split = problem.DatasetSplit.TRAIN wmt_8k_eval_files = wmt_8k.source_data_files(eval_split) wmt_32k_eval_files = wmt_32k.source_data_files(eval_split) self.assertListEqual(wmt_8k_eval_files, wmt_32k_eval_files) self.assertGreater(len(wmt_8k_eval_files), 0) wmt_8k_train_files = wmt_8k.source_data_files(train_split) wmt_32k_train_files = wmt_32k.source_data_files(train_split) self.assertListEqual(wmt_8k_train_files, wmt_32k_train_files) self.assertGreater(len(wmt_8k_train_files), 0)
def test_additional_datasets(self): wmt_8k = translate_ende.TranslateEndeWmt8k() wmt_32k = translate_ende.TranslateEndeWmt32k() self.assertListEqual(wmt_8k.additional_training_datasets, []) self.assertListEqual(wmt_32k.additional_training_datasets, [])
def test_vocab_size(self): wmt_8k = translate_ende.TranslateEndeWmt8k() wmt_32k = translate_ende.TranslateEndeWmt32k() self.assertEqual(wmt_8k.approx_vocab_size, 8192) self.assertEqual(wmt_32k.approx_vocab_size, 32768)