def test_create_metadata_should_succeed(self): tokenizer_md = metadata_info.RegexTokenizerMd( self._DELIM_REGEX_PATTERN, self._VOCAB_FILE) tokenizer_metadata = tokenizer_md.create_metadata() metadata_json = _metadata.convert_to_json( _create_dummy_model_metadata_with_process_uint(tokenizer_metadata)) expected_json = test_utils.load_file(self._EXPECTED_TENSOR_JSON, "r") self.assertEqual(metadata_json, expected_json)
def test_get_tokenizer_associated_files_with_regex_tokenizer(self): # Create Regex tokenizer delim_regex_pattern = r"[^\w\']+" vocab_file = "vocab.txt" tokenizer_md = metadata_info.RegexTokenizerMd(delim_regex_pattern, vocab_file) associated_files = writer_utils.get_tokenizer_associated_files( tokenizer_md.create_metadata().options) self.assertEqual(associated_files, [vocab_file])
def test_create_for_inference_should_succeed(self): writer = nl_classifier.MetadataWriter.create_for_inference( test_utils.load_file(_MODEL), metadata_info.RegexTokenizerMd(_DELIM_REGEX_PATTERN, _VOCAB_FILE), [_LABEL_FILE]) displayer = _metadata.MetadataDisplayer.with_model_buffer( writer.populate()) metadata_json = displayer.get_metadata_json() expected_json = test_utils.load_file(_JSON_FOR_INFERENCE_REGEX, "r") self.assertEqual(metadata_json, expected_json)
def test_create_metadata_should_succeed(self): regex_tokenizer_md = metadata_info.RegexTokenizerMd( self._DELIM_REGEX_PATTERN, self._VOCAB_FILE) text_tensor_md = metadata_info.InputTextTensorMd( self._NAME, self._DESCRIPTION, regex_tokenizer_md) metadata_json = _metadata.convert_to_json( _create_dummy_model_metadata_with_tensor( text_tensor_md.create_metadata())) expected_json = test_utils.load_file(self._EXPECTED_TENSOR_JSON, "r") self.assertEqual(metadata_json, expected_json)