def _create_metadata_file(self): associated_file1 = _metadata_fb.AssociatedFileT() associated_file1.name = b"file1" associated_file2 = _metadata_fb.AssociatedFileT() associated_file2.name = b"file2" self.expected_recorded_files = [ six.ensure_str(associated_file1.name), six.ensure_str(associated_file2.name) ] input_meta = _metadata_fb.TensorMetadataT() output_meta = _metadata_fb.TensorMetadataT() output_meta.associatedFiles = [associated_file2] subgraph = _metadata_fb.SubGraphMetadataT() # Create a model with two inputs and one output. subgraph.inputTensorMetadata = [input_meta, input_meta] subgraph.outputTensorMetadata = [output_meta] model_meta = _metadata_fb.ModelMetadataT() model_meta.name = "Mobilenet_quantized" model_meta.associatedFiles = [associated_file1] model_meta.subgraphMetadata = [subgraph] b = flatbuffers.Builder(0) b.Finish( model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER) metadata_file = self.create_tempfile().full_path with open(metadata_file, "wb") as f: f.write(b.Output()) return metadata_file
def _create_sentence_piece_tokenizer(self): sp_model_name = "sp_model" vocab_file_name = "sp_vocab" sp_model = _metadata_fb.AssociatedFileT() sp_model.name = sp_model_name vocab = _metadata_fb.AssociatedFileT() vocab.name = vocab_file_name vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = ( _metadata_fb.ProcessUnitOptions.SentencePieceTokenizerOptions) tokenizer.options = _metadata_fb.SentencePieceTokenizerOptionsT() tokenizer.options.sentencePieceModel = [sp_model] tokenizer.options.vocabFile = [vocab] return tokenizer, [sp_model_name, vocab_file_name]
def testPopulatedFullPathAssociatedFileShouldSucceed(self): # Create AssociatedFileT using the full path file name. associated_file = _metadata_fb.AssociatedFileT() associated_file.name = self._file1 # Create model metadata with the associated file. subgraph = _metadata_fb.SubGraphMetadataT() subgraph.associatedFiles = [associated_file] # Creates the input and output tensor metadata to match self._model_file. dummy_tensor = _metadata_fb.TensorMetadataT() subgraph.inputTensorMetadata = [dummy_tensor, dummy_tensor] subgraph.outputTensorMetadata = [dummy_tensor] md_buffer = self._create_model_meta_with_subgraph_meta(subgraph) # Populate the metadata to a model. populator = _metadata.MetadataPopulator.with_model_file( self._model_file) populator.load_metadata_buffer(md_buffer) populator.load_associated_files([self._file1]) populator.populate() # The recorded file name in metadata should only contain file basename; file # directory should not be included. recorded_files = populator.get_recorded_associated_file_list() self.assertEqual(set(recorded_files), set([os.path.basename(self._file1)]))
def _create_bert_tokenizer(self): vocab_file_name = "bert_vocab" vocab = _metadata_fb.AssociatedFileT() vocab.name = vocab_file_name vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions tokenizer.options = _metadata_fb.BertTokenizerOptionsT() tokenizer.options.vocabFile = [vocab] return tokenizer, [vocab_file_name]
def create_metadata(self) -> _metadata_fb.AssociatedFileT: """Creates the associated file metadata. Returns: A Flatbuffers Python object of the associated file metadata. """ file_metadata = _metadata_fb.AssociatedFileT() file_metadata.name = os.path.basename(self.file_path) file_metadata.description = self.description file_metadata.type = self.file_type file_metadata.locale = self.locale return file_metadata
def create_metadata(self) -> _metadata_fb.ProcessUnitT: """Creates the sentence piece tokenizer metadata based on the information. Returns: A Flatbuffers Python object of the sentence piece tokenizer metadata. """ tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = ( _metadata_fb.ProcessUnitOptions.SentencePieceTokenizerOptions) tokenizer.options = _metadata_fb.SentencePieceTokenizerOptionsT() sp_model = _metadata_fb.AssociatedFileT() sp_model.name = self._sentence_piece_model_path sp_model.description = self._SP_MODEL_DESCRIPTION tokenizer.options.sentencePieceModel = [sp_model] if self._vocab_file_path: vocab = _metadata_fb.AssociatedFileT() vocab.name = self._vocab_file_path vocab.description = self._SP_VOCAB_FILE_DESCRIPTION vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer.options.vocabFile = [vocab] return tokenizer
def create_metadata(self) -> _metadata_fb.ProcessUnitT: """Creates the Bert tokenizer metadata based on the information. Returns: A Flatbuffers Python object of the Bert tokenizer metadata. """ vocab = _metadata_fb.AssociatedFileT() vocab.name = self._vocab_file_path vocab.description = _VOCAB_FILE_DESCRIPTION vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions tokenizer.options = _metadata_fb.BertTokenizerOptionsT() tokenizer.options.vocabFile = [vocab] return tokenizer