def squad_model(bert_config, max_seq_length, initializer=None, hub_module_url=None, hub_module_trainable=True): """Returns BERT Squad model along with core BERT model to import weights. Args: bert_config: BertConfig, the config defines the core Bert model. max_seq_length: integer, the maximum input sequence length. initializer: Initializer for the final dense layer in the span labeler. Defaulted to TruncatedNormal initializer. hub_module_url: TF-Hub path/url to Bert module. hub_module_trainable: True to finetune layers in the hub module. Returns: A tuple of (1) keras model that outputs start logits and end logits and (2) the core BERT transformer encoder. """ if initializer is None: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) if not hub_module_url: bert_encoder = get_transformer_encoder(bert_config, max_seq_length) return models.BertSpanLabeler(network=bert_encoder, initializer=initializer), bert_encoder input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_type_ids') core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable) pooled_output, sequence_output = core_model( [input_word_ids, input_mask, input_type_ids]) bert_encoder = tf.keras.Model(inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids, }, outputs=[sequence_output, pooled_output], name='core_model') return models.BertSpanLabeler(network=bert_encoder, initializer=initializer), bert_encoder
def build_model(self): if self._hub_module: # TODO(lehou): maybe add the hub_module building logic to a util function. input_word_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='input_type_ids') bert_model = hub.KerasLayer(self._hub_module, trainable=True) pooled_output, sequence_output = bert_model( [input_word_ids, input_mask, input_type_ids]) encoder_network = tf.keras.Model( inputs=[input_word_ids, input_mask, input_type_ids], outputs=[sequence_output, pooled_output]) else: encoder_network = encoders.instantiate_encoder_from_cfg( self.task_config.network) return models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal( stddev=self.task_config.network.initializer_range))
def create_qa_model(bert_config, max_seq_length, initializer=None, hub_module_url=None, hub_module_trainable=True, is_tf2=True): """Returns BERT qa model along with core BERT model to import weights. Args: bert_config: BertConfig, the config defines the core Bert model. max_seq_length: integer, the maximum input sequence length. initializer: Initializer for the final dense layer in the span labeler. Defaulted to TruncatedNormal initializer. hub_module_url: TF-Hub path/url to Bert module. hub_module_trainable: True to finetune layers in the hub module. is_tf2: boolean, whether the hub module is in TensorFlow 2.x format. Returns: A tuple of (1) keras model that outputs start logits and end logits and (2) the core BERT transformer encoder. """ if initializer is None: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) input_word_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids') if is_tf2: core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable) pooled_output, sequence_output = core_model( [input_word_ids, input_mask, input_type_ids]) else: bert_model = hub_loader.HubKerasLayerV1V2( hub_module_url, signature='tokens', signature_outputs_as_dict=True, trainable=hub_module_trainable) outputs = bert_model({ 'input_ids': input_word_ids, 'input_mask': input_mask, 'segment_ids': input_type_ids }) pooled_output = outputs['pooled_output'] sequence_output = outputs['sequence_output'] bert_encoder = tf.keras.Model( inputs=[input_word_ids, input_mask, input_type_ids], outputs=[sequence_output, pooled_output], name='core_model') return models.BertSpanLabeler( network=bert_encoder, initializer=initializer), bert_encoder
def setUp(self): super(ExportTfliteSquadTest, self).setUp() experiment_params = params.EdgeTPUBERTCustomParams() pretrainer_model = model_builder.build_bert_pretrainer( experiment_params.student_model, name='pretrainer') encoder_network = pretrainer_model.encoder_network self.span_labeler = models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))
def build_model(self): if self._hub_module: encoder_network = utils.get_encoder_from_hub(self._hub_module) else: encoder_network = encoders.instantiate_encoder_from_cfg( self.task_config.network) return models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal( stddev=self.task_config.network.initializer_range))
def build_model(self): if self._hub_module: encoder_network = utils.get_encoder_from_hub(self._hub_module) else: encoder_network = encoders.instantiate_encoder_from_cfg( self.task_config.model.encoder) # Currently, we only supports bert-style question answering finetuning. return models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal( stddev=self.task_config.model.encoder.initializer_range))
def build_model(self): if self.task_config.hub_module_url and self.task_config.init_checkpoint: raise ValueError('At most one of `hub_module_url` and ' '`init_checkpoint` can be specified.') if self.task_config.hub_module_url: encoder_network = utils.get_encoder_from_hub( self.task_config.hub_module_url) else: encoder_network = encoders.build_encoder(self.task_config.model.encoder) encoder_cfg = self.task_config.model.encoder.get() return models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range))
def build_model(self): if self.task_config.hub_module_url and self.task_config.init_checkpoint: raise ValueError('At most one of `hub_module_url` and ' '`init_checkpoint` can be specified.') if self.task_config.hub_module_url: hub_module = hub.load(self.task_config.hub_module_url) else: hub_module = None if hub_module: encoder_network = utils.get_encoder_from_hub(hub_module) else: encoder_network = encoders.build_encoder( self.task_config.model.encoder) encoder_cfg = self.task_config.model.encoder.get() # Currently, we only supports bert-style question answering finetuning. return models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg.initializer_range))
def main(argv: Sequence[str]) -> None: if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') # Set up experiment params and load the configs from file/files. experiment_params = params.EdgeTPUBERTCustomParams() experiment_params = utils.config_override(experiment_params, FLAGS) # change the input mask type to tf.float32 to avoid additional casting op. experiment_params.student_model.encoder.mobilebert.input_mask_dtype = 'float32' # Experiments indicate using -120 as the mask value for Softmax is good enough # for both int8 and bfloat. So we set quantization_friendly to True for both # quant and float model. pretrainer_model = model_builder.build_bert_pretrainer( experiment_params.student_model, name='pretrainer', quantization_friendly=True) encoder_network = pretrainer_model.encoder_network model = models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01)) # Load model weights. if FLAGS.model_checkpoint is not None: checkpoint_dict = {'model': model} checkpoint = tf.train.Checkpoint(**checkpoint_dict) checkpoint.restore( FLAGS.model_checkpoint).assert_existing_objects_matched() model_for_serving = build_model_for_serving(model) model_for_serving.summary() # TODO(b/194449109): Need to save the model to file and then convert tflite # with 'tf.lite.TFLiteConverter.from_saved_model()' to get the expected # accuracy tmp_dir = tempfile.TemporaryDirectory().name model_for_serving.save(tmp_dir) def _representative_dataset(): dataset_params = question_answering_dataloader.QADataConfig() dataset_params.input_path = SQUAD_TRAIN_SPLIT dataset_params.drop_remainder = False dataset_params.global_batch_size = 1 dataset_params.is_training = True dataset = orbit.utils.make_distributed_dataset( tf.distribute.get_strategy(), build_inputs, dataset_params) for example in dataset.take(100): inputs = example[0] input_word_ids = inputs['input_word_ids'] input_mask = inputs['input_mask'] input_type_ids = inputs['input_type_ids'] yield [input_word_ids, input_mask, input_type_ids] converter = tf.lite.TFLiteConverter.from_saved_model(tmp_dir) if FLAGS.quantization_method in ['full-integer', 'hybrid']: converter.optimizations = [tf.lite.Optimize.DEFAULT] if FLAGS.quantization_method in ['full-integer']: converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS_INT8 ] converter.inference_input_type = tf.int8 converter.inference_output_type = tf.float32 converter.representative_dataset = _representative_dataset tflite_quant_model = converter.convert() export_model_path = os.path.join(FLAGS.export_path, 'model.tflite') with tf.io.gfile.GFile(export_model_path, 'wb') as f: f.write(tflite_quant_model) logging.info('Successfully save the tflite to %s', FLAGS.export_path)