def _create_bert_tokenizer(self): vocab_file_name = "bert_vocab" vocab = _metadata_fb.AssociatedFileT() vocab.name = vocab_file_name vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions tokenizer.options = _metadata_fb.BertTokenizerOptionsT() tokenizer.options.vocabFile = [vocab] return tokenizer, [vocab_file_name]
def create_metadata(self) -> _metadata_fb.ProcessUnitT: """Creates the score calibration metadata based on the information. Returns: A Flatbuffers Python object of the score calibration metadata. """ score_calibration = _metadata_fb.ProcessUnitT() score_calibration.optionsType = ( _metadata_fb.ProcessUnitOptions.ScoreCalibrationOptions) options = _metadata_fb.ScoreCalibrationOptionsT() options.scoreTransformation = self._score_transformation_type options.defaultScore = self._default_score score_calibration.options = options return score_calibration
def create_metadata(self) -> _metadata_fb.ProcessUnitT: """Creates the Bert tokenizer metadata based on the information. Returns: A Flatbuffers Python object of the Bert tokenizer metadata. """ vocab = _metadata_fb.AssociatedFileT() vocab.name = self._vocab_file_path vocab.description = _VOCAB_FILE_DESCRIPTION vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions tokenizer.options = _metadata_fb.BertTokenizerOptionsT() tokenizer.options.vocabFile = [vocab] return tokenizer
def _create_sentence_piece_tokenizer(self): sp_model_name = "sp_model" vocab_file_name = "sp_vocab" sp_model = _metadata_fb.AssociatedFileT() sp_model.name = sp_model_name vocab = _metadata_fb.AssociatedFileT() vocab.name = vocab_file_name vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = ( _metadata_fb.ProcessUnitOptions.SentencePieceTokenizerOptions) tokenizer.options = _metadata_fb.SentencePieceTokenizerOptionsT() tokenizer.options.sentencePieceModel = [sp_model] tokenizer.options.vocabFile = [vocab] return tokenizer, [sp_model_name, vocab_file_name]
def create_metadata(self) -> _metadata_fb.TensorMetadataT: """Creates the input image metadata based on the information. Returns: A Flatbuffers Python object of the input image metadata. """ tensor_metadata = super().create_metadata() tensor_metadata.content.contentProperties.colorSpace = self.color_space_type # Create normalization parameters if self.norm_mean and self.norm_std: normalization = _metadata_fb.ProcessUnitT() normalization.optionsType = ( _metadata_fb.ProcessUnitOptions.NormalizationOptions) normalization.options = _metadata_fb.NormalizationOptionsT() normalization.options.mean = self.norm_mean normalization.options.std = self.norm_std tensor_metadata.processUnits = [normalization] return tensor_metadata
def create_metadata(self) -> _metadata_fb.ProcessUnitT: """Creates the sentence piece tokenizer metadata based on the information. Returns: A Flatbuffers Python object of the sentence piece tokenizer metadata. """ tokenizer = _metadata_fb.ProcessUnitT() tokenizer.optionsType = ( _metadata_fb.ProcessUnitOptions.SentencePieceTokenizerOptions) tokenizer.options = _metadata_fb.SentencePieceTokenizerOptionsT() sp_model = _metadata_fb.AssociatedFileT() sp_model.name = self._sentence_piece_model_path sp_model.description = self._SP_MODEL_DESCRIPTION tokenizer.options.sentencePieceModel = [sp_model] if self._vocab_file_path: vocab = _metadata_fb.AssociatedFileT() vocab.name = self._vocab_file_path vocab.description = self._SP_VOCAB_FILE_DESCRIPTION vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY tokenizer.options.vocabFile = [vocab] return tokenizer