def siamese_model(bert_config, num_labels, siamese_type='classify', pooling_type='CLS'): encoder = bert_models.get_transformer_encoder(bert_config) bert_siamese = BertSiamese(encoder=encoder, pooling_type=pooling_type, dropout_rate=bert_config.hidden_dropout_prob, norm=True if siamese_type == 'ams' else False) # Uncomment following line to get a baseline model to debug your network # bert_siamese = encoder = LSTMSiamese( # bert_config.vocab_size, bert_config.hidden_size,norm=True if siamese_type=='ams' else False) if siamese_type == 'classify': bert_siamese = SiameseClassifierModel( bert_siamese, num_labels=num_labels, dropout_rate=bert_config.hidden_dropout_prob) elif siamese_type == 'triplet': bert_siamese = SiameseTripletModel(bert_siamese) elif siamese_type == 'contrastive': bert_siamese = SiameseContrastiveModel(bert_siamese) elif siamese_type == 'ams': bert_siamese = SiameseAMSModel(bert_siamese) else: raise ValueError(f'Siamese type {siamese_type} not supported!!') return bert_siamese, encoder
def create_albert_model( albert_config: bert_modeling.AlbertConfig) -> tf.keras.Model: """Creates an ALBERT keras core model from ALBERT configuration. Args: albert_config: An `AlbertConfig` to create the core model. Returns: A keras model. """ # Adds input layers just as placeholders. input_word_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_word_ids") input_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_mask") input_type_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_type_ids") transformer_encoder = bert_models.get_transformer_encoder( albert_config, sequence_length=None, float_dtype=tf.float32) sequence_output, pooled_output = transformer_encoder( [input_word_ids, input_mask, input_type_ids]) # To keep consistent with legacy hub modules, the outputs are # "pooled_output" and "sequence_output". return tf.keras.Model(inputs=[input_word_ids, input_mask, input_type_ids], outputs=[pooled_output, sequence_output]), transformer_encoder
def unified_model(bert_config, num_labels, max_seq_length, initializer=None): """BERT unified model in functional API style. Construct a Keras model for predicting `num_labels` outputs from an input with maximum sequence length `max_seq_length`, as well as Args: bert_config: BertConfig or AlbertConfig, the config defines the core BERT or ALBERT model. num_labels: integer, the number of classes. max_seq_length: integer, the maximum input sequence length. final_layer_initializer: Initializer for final dense layer. Defaulted TruncatedNormal initializer. hub_module_url: TF-Hub path/url to Bert module. Returns: Combined prediction model (words, mask, type) -> (one-hot labels) BERT sub-model (words, mask, type) -> (bert_outputs) """ if initializer is None: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) bert_encoder = bert_models.get_transformer_encoder(bert_config, max_seq_length) return bert_unified_labeler.BertUnifiedLabeler( bert_encoder, num_classes=num_labels, initializer=initializer, dropout_rate=bert_config.hidden_dropout_prob), bert_encoder
def __init__(self, bert_config, name_to_features=None, name="serving_model"): super(BertServing, self).__init__(name=name) self.encoder = bert_models.get_transformer_encoder( bert_config, sequence_length=None) self.name_to_features = name_to_features
def rewrite_weights(orig_ckpt, orig_config, output_ckpt, pooler_initialization): """Remove vestigial pooler weights.""" # read original checkpoint print(f"building model from config: [{orig_config}] ...") bert_config = bert_configs.BertConfig.from_json_file(orig_config) m = bert_models.get_transformer_encoder(bert_config=bert_config, sequence_length=1, output_range=1) print("...successfully built model.") print(f"\nloading model from prefix: [{orig_ckpt}] ...") checkpoint = tf.train.Checkpoint(model=m) checkpoint.restore(orig_ckpt).assert_existing_objects_matched() print("...successfully loaded model.") orig_pooler_weights, orig_pooler_bias = m.pooler_layer.weights print("\nupdating weights...") # update pooler bias print(" ...pooler bias with zeros.") new_pooler_bias = tf.constant(0., dtype=orig_pooler_bias.dtype, shape=orig_pooler_bias.shape) # update pooler weights pooler_shape = orig_pooler_weights.shape pooler_dtype = orig_pooler_weights.dtype if pooler_initialization == "identity": print(" ...pooler weights with identity.") new_pooler_weights = tf.eye(pooler_shape[0], dtype=pooler_dtype) elif pooler_initialization == "truncated_normal": stddev = bert_config.initializer_range print(" ...pooler weights with truncated_normal " "(stddev={}).".format(stddev)) new_pooler_weights = tf.random.truncated_normal(shape=pooler_shape, mean=0., stddev=stddev, dtype=pooler_dtype) else: raise ValueError(pooler_initialization) m.pooler_layer.set_weights([new_pooler_weights, new_pooler_bias]) print("...weights updated!") print("\nsaving checkpoint...") new_checkpoint = tf.train.Checkpoint(model=m) # access save_counter so it is created before saving the checkpoint. new_checkpoint.save_counter # pylint: disable=pointless-statement new_checkpoint.write(output_ckpt) print("... saved!") print(f"\nsurgery successful! new model at: [{output_ckpt}]")
def ner_model(bert_config, num_labels, use_crf=False): encoder_network = bert_models.get_transformer_encoder(bert_config) if use_crf: model = BertNERCRFModel(encoder_network, num_labels) else: model = models.BertTokenClassifier( network=encoder_network, num_classes=num_labels, initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), dropout_rate=bert_config.hidden_dropout_prob, output='logits') return model, encoder_network
def __init__(self, batch_size, config, bert_config, training, verbose=False): super(Model, self).__init__() self.config = config self.bert_encoder = bert_models.get_transformer_encoder( bert_config, sequence_length=self.config["max_num_wordpieces"]) self.application_score_layer = ApplicationScoreLayer(config) self.training = training self.batch_size = batch_size
def build_model(bert_dir): max_seq_len = 384 bert_config = BertConfig.from_json_file(os.path.join(bert_dir, 'bert_config.json')) bert_encoder = get_transformer_encoder(bert_config, max_seq_len) input_ids = tf.keras.layers.Input(shape=(max_seq_len,), dtype=tf.int32, name='input_ids') input_mask = tf.keras.layers.Input(shape=(max_seq_len,), dtype=tf.int32, name='input_mask') segment_ids = tf.keras.layers.Input(shape=(max_seq_len,), dtype=tf.int32, name='segment_ids') bert_inputs = [input_ids, input_mask, segment_ids] bert_sequence_output, bert_pooled_output = bert_encoder(bert_inputs) out = Dense(1, activation='sigmoid', name='out')(bert_pooled_output) return Model(inputs=bert_inputs, outputs=[out])
def __init__(self, max_seq_length: int, bert_config: configs.BertConfig, trainable: bool): """BERT class constructor. Args: max_seq_length: the maximum input sequence length. bert_config: Configuration for a BERT model. trainable: whether the model is trainable. """ super(_BERT, self).__init__() self.bert_model = bert_models.get_transformer_encoder( bert_config, max_seq_length) self._trainable = trainable
def squad_model(bert_config, max_seq_length, initializer=None, hub_module_url=None, hub_module_trainable=True): """Returns BERT Squad model along with core BERT model to import weights. Args: bert_config: BertConfig, the config defines the core Bert model. max_seq_length: integer, the maximum input sequence length. initializer: Initializer for the final dense layer in the span labeler. Defaulted to TruncatedNormal initializer. hub_module_url: TF-Hub path/url to Bert module. hub_module_trainable: True to finetune layers in the hub module. Returns: A tuple of (1) keras model that outputs start logits and end logits and (2) the core BERT transformer encoder. """ if initializer is None: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) if not hub_module_url: bert_encoder = bert_models.get_transformer_encoder(bert_config, max_seq_length) return models.BertSpanLabeler( network=bert_encoder, initializer=initializer), bert_encoder input_word_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids') core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable) pooled_output, sequence_output = core_model( [input_word_ids, input_mask, input_type_ids]) bert_encoder = tf.keras.Model( inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids, }, outputs=[sequence_output, pooled_output], name='core_model') return models.BertSpanLabeler( network=bert_encoder, initializer=initializer), bert_encoder
def bert_model( num_classes: int, max_seq_length: int, bert_config: configs.BertConfig) -> Tuple[tf.keras.Model, tf.keras.Model]: """BERT classifier model in functional API style. Construct a Keras model for predicting `num_labels` outputs from an input with maximum sequence length `max_seq_length`. Args: num_classes: (int) the number of classes. max_seq_length: (int) the maximum input sequence length. bert_config: (BertConfig) Configuration for a BERT model. Returns: Combined prediction model (words, mask, type) -> (one-hot labels) BERT sub-model (words, mask, type) -> (bert_outputs) """ # Defines initializer and encoder. final_layer_initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) bert_encoder = bert_models.get_transformer_encoder( bert_config, max_seq_length, output_range=1) # Build model. inputs = bert_encoder.inputs _, cls_output = bert_encoder(inputs) cls_output = tf.keras.layers.Dropout(rate=bert_config.hidden_dropout_prob)( cls_output) # Build output. outputs = tf.keras.layers.Dense( num_classes, activation=None, kernel_initializer=final_layer_initializer, name='predictions/transform/logits')( cls_output) # Construct model. bert_classifier = tf.keras.Model(inputs=inputs, outputs=outputs) return bert_classifier, bert_encoder
def pretrain_model(electra_config, seq_length, max_predictions_per_seq, initializer=None): """Returns model to be used for pre-training. Args: bert_config: Configuration that defines the core BERT model. seq_length: Maximum sequence length of the training data. max_predictions_per_seq: Maximum number of tokens in sequence to mask out and use for pretraining. initializer: Initializer for weights in BertPretrainer. Returns: Pretraining model as well as core BERT submodel from which to save weights after pretraining. """ input_word_ids = tf.keras.layers.Input( shape=(seq_length,), name='input_word_ids', dtype=tf.int32) input_mask = tf.keras.layers.Input( shape=(seq_length,), name='input_mask', dtype=tf.int32) input_type_ids = tf.keras.layers.Input( shape=(seq_length,), name='input_type_ids', dtype=tf.int32) masked_lm_positions = tf.keras.layers.Input( shape=(max_predictions_per_seq,), name='masked_lm_positions', dtype=tf.int32) masked_lm_ids = tf.keras.layers.Input( shape=(max_predictions_per_seq,), name='masked_lm_ids', dtype=tf.int32) masked_lm_weights = tf.keras.layers.Input( shape=(max_predictions_per_seq,), name='masked_lm_weights', dtype=tf.int32) gen_encoder = bert_models.get_transformer_encoder( electraconfigs.ElectraConfig.get_generator_bert(electra_config), seq_length) discrim_encoder = bert_models.get_transformer_encoder( electraconfigs.ElectraConfig.get_discriminator_bert(electra_config), seq_length) if initializer is None: initializer = tf.keras.initializers.TruncatedNormal( stddev=electra_config.initializer_range) pretrainer_model = models.ElectraPretrainer( network=gen_encoder, discriminator=discrim_encoder, num_classes=2, num_token_predictions=max_predictions_per_seq, initializer=initializer, output='predictions') lm_output, discrim_output, discrim_labels = pretrainer_model( [input_word_ids, input_mask, input_type_ids, masked_lm_positions]) pretrain_loss_layer = ElectraPretrainLossAndMetricLayer( vocab_size=electra_config.vocab_size, discrim_rate=electra_config.discrim_rate) output_loss = pretrain_loss_layer(lm_output, masked_lm_ids, masked_lm_weights, discrim_output, discrim_labels, input_mask) keras_model = tf.keras.Model( inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids, 'masked_lm_positions': masked_lm_positions, 'masked_lm_ids': masked_lm_ids, 'masked_lm_weights': masked_lm_weights, }, outputs=output_loss) return keras_model, discrim_encoder
def classifier_model(bert_config, num_labels, max_seq_length, final_layer_initializer=None, hub_module_url=None, hub_module_trainable=True): """BERT classifier model in functional API style. Construct a Keras model for predicting `num_labels` outputs from an input with maximum sequence length `max_seq_length`. Args: bert_config: BertConfig or AlbertConfig, the config defines the core BERT or ALBERT model. num_labels: integer, the number of classes. max_seq_length: integer, the maximum input sequence length. final_layer_initializer: Initializer for final dense layer. Defaulted TruncatedNormal initializer. hub_module_url: TF-Hub path/url to Bert module. hub_module_trainable: True to finetune layers in the hub module. Returns: Combined prediction model (words, mask, type) -> (one-hot labels) BERT sub-model (words, mask, type) -> (bert_outputs) """ if final_layer_initializer is not None: initializer = final_layer_initializer else: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) if not hub_module_url: bert_encoder = bert_models.get_transformer_encoder(bert_config, max_seq_length) return models.BertClassifier( bert_encoder, num_classes=num_labels, dropout_rate=bert_config.hidden_dropout_prob, initializer=initializer), bert_encoder input_word_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids') bert_model = hub.KerasLayer( hub_module_url, trainable=hub_module_trainable) pooled_output, _ = bert_model([input_word_ids, input_mask, input_type_ids]) output = tf.keras.layers.Dropout(rate=bert_config.hidden_dropout_prob)( pooled_output) output = tf.keras.layers.Dense( num_labels, kernel_initializer=initializer, name='output')( output) return tf.keras.Model( inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids }, outputs=output), bert_model