def __init__(self, params: BaseParams, problem_name: str, input_embeddings: tf.keras.layers.Layer=None, share_embedding=True) -> None: super(MaskLM, self).__init__(name=problem_name) self.params = params self.problem_name = problem_name if share_embedding is False: self.vocab_size = self.params.bert_config.vocab_size self.share_embedding = False else: self.vocab_size = input_embeddings.shape[0] embedding_size = input_embeddings.shape[-1] share_valid = (self.params.bert_config.hidden_size == embedding_size) if not share_valid and self.params.share_embedding: logger.warning( 'Share embedding is enabled but hidden_size != embedding_size') self.share_embedding = self.params.share_embedding & share_valid if self.share_embedding: self.share_embedding_layer = TFSharedEmbeddings( vocab_size=self.vocab_size, hidden_size=input_embeddings.shape[1]) self.share_embedding_layer.build([1]) self.share_embedding_layer.weight = input_embeddings else: self.share_embedding_layer = tf.keras.layers.Dense(self.vocab_size)
def test_keras_save_load(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) # remove `return_loss` to make code work if self.__class__.__name__ == "TFCLIPModelTest": inputs_dict.pop("return_loss", None) tf_main_layer_classes = set( module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__), ) for module_member_name in dir(module) if module_member_name.endswith("MainLayer") # This condition is required, since `modeling_tf_clip.py` has 3 classes whose names end with `MainLayer`. and module_member_name[:-len("MainLayer")] == model_class.__name__[:-len("Model")] for module_member in (getattr(module, module_member_name), ) if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False)) for main_layer_class in tf_main_layer_classes: # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter if "T5" in main_layer_class.__name__: # Take the same values than in TFT5ModelTester for this shared layer shared = TFSharedEmbeddings(99, 32, name="shared") config.use_cache = inputs_dict.pop("use_cache", None) main_layer = main_layer_class(config, embed_tokens=shared) else: main_layer = main_layer_class(config) symbolic_inputs = { name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items() } model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs)) outputs = model(inputs_dict) with tempfile.TemporaryDirectory() as tmpdirname: filepath = os.path.join(tmpdirname, "keras_model.h5") model.save(filepath) if "T5" in main_layer_class.__name__: model = tf.keras.models.load_model( filepath, custom_objects={ main_layer_class.__name__: main_layer_class, "TFSharedEmbeddings": TFSharedEmbeddings, }, ) else: model = tf.keras.models.load_model( filepath, custom_objects={ main_layer_class.__name__: main_layer_class }) assert isinstance(model, tf.keras.Model) after_outputs = model(inputs_dict) self.assert_outputs_same(after_outputs, outputs)
def test_keras_save_load(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) tf_main_layer_classes = set( module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__), ) for module_member_name in dir(module) if module_member_name.endswith("MainLayer") for module_member in (getattr(module, module_member_name), ) if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False)) for main_layer_class in tf_main_layer_classes: # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter if "T5" in main_layer_class.__name__: # Take the same values than in TFT5ModelTester for this shared layer shared = TFSharedEmbeddings(99, 32, name="shared") main_layer = main_layer_class(config, embed_tokens=shared) else: main_layer = main_layer_class(config) symbolic_inputs = { name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items() } model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs)) outputs = model(inputs_dict) with tempfile.TemporaryDirectory() as tmpdirname: filepath = os.path.join(tmpdirname, "keras_model.h5") model.save(filepath) if "T5" in main_layer_class.__name__: model = tf.keras.models.load_model( filepath, custom_objects={ main_layer_class.__name__: main_layer_class, "TFSharedEmbeddings": TFSharedEmbeddings, }, ) else: model = tf.keras.models.load_model( filepath, custom_objects={ main_layer_class.__name__: main_layer_class }) assert isinstance(model, tf.keras.Model) after_outputs = model(inputs_dict) self.assert_outputs_same(after_outputs, outputs)
def test_train_pipeline_custom_model(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) # head_mask and decoder_head_mask has different shapes than other input args if "head_mask" in inputs_dict: del inputs_dict["head_mask"] if "decoder_head_mask" in inputs_dict: del inputs_dict["decoder_head_mask"] if "cross_attn_head_mask" in inputs_dict: del inputs_dict["cross_attn_head_mask"] tf_main_layer_classes = set( module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__), ) for module_member_name in dir(module) if module_member_name.endswith("MainLayer") for module_member in (getattr(module, module_member_name), ) if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False)) for main_layer_class in tf_main_layer_classes: # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter if "T5" in main_layer_class.__name__: # Take the same values than in TFT5ModelTester for this shared layer shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared") config.use_cache = False main_layer = main_layer_class(config, embed_tokens=shared) else: main_layer = main_layer_class(config) symbolic_inputs = { name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items() } if hasattr(self.model_tester, "num_labels"): num_labels = self.model_tester.num_labels else: num_labels = 2 X = tf.data.Dataset.from_tensor_slices( (inputs_dict, np.ones( (self.model_tester.batch_size, self.model_tester.seq_length, num_labels, 1)))).batch(1) hidden_states = main_layer(symbolic_inputs)[0] outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states) model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs]) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_accuracy"]) model.fit(X, epochs=1) with tempfile.TemporaryDirectory() as tmpdirname: filepath = os.path.join(tmpdirname, "keras_model.h5") model.save(filepath) if "T5" in main_layer_class.__name__: model = tf.keras.models.load_model( filepath, custom_objects={ main_layer_class.__name__: main_layer_class, "TFSharedEmbeddings": TFSharedEmbeddings, }, ) else: model = tf.keras.models.load_model( filepath, custom_objects={ main_layer_class.__name__: main_layer_class }) assert isinstance(model, tf.keras.Model) model(inputs_dict)
class MaskLM(tf.keras.Model): """Multimodal MLM top layer. """ def __init__(self, params: BaseParams, problem_name: str, input_embeddings: tf.keras.layers.Layer=None, share_embedding=True) -> None: super(MaskLM, self).__init__(name=problem_name) self.params = params self.problem_name = problem_name if share_embedding is False: self.vocab_size = self.params.bert_config.vocab_size self.share_embedding = False else: self.vocab_size = input_embeddings.shape[0] embedding_size = input_embeddings.shape[-1] share_valid = (self.params.bert_config.hidden_size == embedding_size) if not share_valid and self.params.share_embedding: logger.warning( 'Share embedding is enabled but hidden_size != embedding_size') self.share_embedding = self.params.share_embedding & share_valid if self.share_embedding: self.share_embedding_layer = TFSharedEmbeddings( vocab_size=self.vocab_size, hidden_size=input_embeddings.shape[1]) self.share_embedding_layer.build([1]) self.share_embedding_layer.weight = input_embeddings else: self.share_embedding_layer = tf.keras.layers.Dense(self.vocab_size) def call(self, inputs): mode = get_phase() features, hidden_features = inputs # masking is done inside the model seq_hidden_feature = hidden_features['seq'] if mode != PREDICT: positions = features['masked_lm_positions'] # gather_indexes will flatten the seq hidden_states, we need to reshape # back to 3d tensor input_tensor = gather_indexes(seq_hidden_feature, positions) shape_tensor = tf.shape(positions) shape_list = tf.concat([shape_tensor, [seq_hidden_feature.shape.as_list()[-1]]], axis=0) input_tensor = tf.reshape(input_tensor, shape=shape_list) # set_shape to determin rank input_tensor.set_shape( [None, None, seq_hidden_feature.shape.as_list()[-1]]) else: input_tensor = seq_hidden_feature if self.share_embedding: mlm_logits = self.share_embedding_layer( input_tensor, mode='linear') else: mlm_logits = self.share_embedding_layer(input_tensor) if mode != PREDICT: mlm_labels = features['masked_lm_ids'] mlm_labels.set_shape([None, None]) mlm_labels = pad_to_shape(from_tensor=mlm_labels, to_tensor=mlm_logits, axis=1) # compute loss mlm_loss = empty_tensor_handling_loss( mlm_labels, mlm_logits, tf.keras.losses.sparse_categorical_crossentropy ) loss = nan_loss_handling(mlm_loss) self.add_loss(loss) return tf.nn.softmax(mlm_logits)