def _create_bert_tokenizer(self): vocab_file_name = "bert_vocab" vocab = _metadata_fb.AssociatedFileT() vocab.name = vocab_file_name vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions tokenizer.options = _metadata_fb.BertTokenizerOptionsT() tokenizer.options.vocabFile = [vocab] return tokenizer, [vocab_file_name]
def create_metadata(self) -> _metadata_fb.ProcessUnitT: """Creates the Bert tokenizer metadata based on the information. Returns: A Flatbuffers Python object of the Bert tokenizer metadata. """ vocab = _metadata_fb.AssociatedFileT() vocab.name = self._vocab_file_path vocab.description = _VOCAB_FILE_DESCRIPTION vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions tokenizer.options = _metadata_fb.BertTokenizerOptionsT() tokenizer.options.vocabFile = [vocab] return tokenizer