def _get_bert_config_or_encoder_config(use_bert_config, hidden_size, num_hidden_layers, vocab_size=100): """Returns config args for export_tfhub_lib._create_model().""" if use_bert_config: bert_config = configs.BertConfig(vocab_size=vocab_size, hidden_size=hidden_size, intermediate_size=32, max_position_embeddings=128, num_attention_heads=2, num_hidden_layers=num_hidden_layers) encoder_config = None else: bert_config = None encoder_config = encoders.EncoderConfig( type="albert", albert=encoders.AlbertEncoderConfig(vocab_size=vocab_size, embedding_width=16, hidden_size=hidden_size, intermediate_size=32, max_position_embeddings=128, num_attention_heads=2, num_layers=num_hidden_layers, dropout_rate=0.1)) return bert_config, encoder_config
def _export_bert_tfhub(self): bert_config = configs.BertConfig(vocab_size=30522, hidden_size=16, intermediate_size=32, max_position_embeddings=128, num_attention_heads=2, num_hidden_layers=4) encoder = export_tfhub_lib.get_bert_encoder(bert_config) model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") checkpoint = tf.train.Checkpoint(encoder=encoder) checkpoint.save(os.path.join(model_checkpoint_dir, "test")) model_checkpoint_path = tf.train.latest_checkpoint( model_checkpoint_dir) vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt") with tf.io.gfile.GFile(vocab_file, "w") as f: f.write("dummy content") export_path = os.path.join(self.get_temp_dir(), "hub") export_tfhub_lib.export_model( export_path, bert_config=bert_config, encoder_config=None, model_checkpoint_path=model_checkpoint_path, vocab_file=vocab_file, do_lower_case=True, with_mlm=False) return export_path
def setUp(self): super(BertModelsTest, self).setUp() self._bert_test_config = bert_configs.BertConfig( attention_probs_dropout_prob=0.0, hidden_act='gelu', hidden_dropout_prob=0.0, hidden_size=16, initializer_range=0.02, intermediate_size=32, max_position_embeddings=128, num_attention_heads=2, num_hidden_layers=2, type_vocab_size=2, vocab_size=30522)
def test_export_model(self): # Exports a savedmodel for TF-Hub hidden_size = 16 bert_config = configs.BertConfig(vocab_size=100, hidden_size=hidden_size, intermediate_size=32, max_position_embeddings=128, num_attention_heads=2, num_hidden_layers=1) labse_model, encoder = export_tfhub.create_labse_model(None, bert_config, normalize=True) model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") checkpoint = tf.train.Checkpoint(encoder=encoder) checkpoint.save(os.path.join(model_checkpoint_dir, "test")) model_checkpoint_path = tf.train.latest_checkpoint( model_checkpoint_dir) vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt") with tf.io.gfile.GFile(vocab_file, "w") as f: f.write("dummy content") hub_destination = os.path.join(self.get_temp_dir(), "hub") export_tfhub.export_labse_model( None, # bert_tfhub_module bert_config, model_checkpoint_path, hub_destination, vocab_file, do_lower_case=True, normalize=True) # Restores a hub KerasLayer. hub_layer = hub.KerasLayer(hub_destination, trainable=True) if hasattr(hub_layer, "resolved_object"): # Checks meta attributes. self.assertTrue(hub_layer.resolved_object.do_lower_case.numpy()) with tf.io.gfile.GFile(hub_layer.resolved_object.vocab_file. asset_path.numpy()) as f: self.assertEqual("dummy content", f.read()) # Checks the hub KerasLayer. for source_weight, hub_weight in zip(labse_model.trainable_weights, hub_layer.trainable_weights): self.assertAllClose(source_weight.numpy(), hub_weight.numpy()) seq_length = 10 dummy_ids = np.zeros((2, seq_length), dtype=np.int32) hub_outputs = hub_layer([dummy_ids, dummy_ids, dummy_ids]) source_outputs = labse_model([dummy_ids, dummy_ids, dummy_ids]) self.assertEqual(hub_outputs["pooled_output"].shape, (2, hidden_size)) self.assertEqual(hub_outputs["sequence_output"].shape, (2, seq_length, hidden_size)) for output_name in source_outputs: self.assertAllClose(hub_outputs[output_name].numpy(), hub_outputs[output_name].numpy()) # Test that training=True makes a difference (activates dropout). def _dropout_mean_stddev(training, num_runs=20): input_ids = np.array([[14, 12, 42, 95, 99]], np.int32) inputs = [ input_ids, np.ones_like(input_ids), np.zeros_like(input_ids) ] outputs = np.concatenate([ hub_layer(inputs, training=training)["pooled_output"] for _ in range(num_runs) ]) return np.mean(np.std(outputs, axis=0)) self.assertLess(_dropout_mean_stddev(training=False), 1e-6) self.assertGreater(_dropout_mean_stddev(training=True), 1e-3) # Test propagation of seq_length in shape inference. input_word_ids = tf.keras.layers.Input(shape=(seq_length, ), dtype=tf.int32) input_mask = tf.keras.layers.Input(shape=(seq_length, ), dtype=tf.int32) input_type_ids = tf.keras.layers.Input(shape=(seq_length, ), dtype=tf.int32) outputs = hub_layer([input_word_ids, input_mask, input_type_ids]) self.assertEqual(outputs["pooled_output"].shape.as_list(), [None, hidden_size]) self.assertEqual(outputs["sequence_output"].shape.as_list(), [None, seq_length, hidden_size])
def test_forward_pass(self, use_pointing=False, query_transformer=False, is_training=True): """Randomly generate and run different configuarations for Felix Tagger.""" # Ensures reproducibility. # Setup. sequence_length = 7 vocab_size = 11 bert_hidden_size = 13 bert_num_hidden_layers = 1 bert_num_attention_heads = 1 bert_intermediate_size = 4 bert_type_vocab_size = 2 bert_max_position_embeddings = sequence_length bert_encoder = networks.BertEncoder( vocab_size=vocab_size, hidden_size=bert_hidden_size, num_layers=bert_num_hidden_layers, num_attention_heads=bert_num_attention_heads, intermediate_size=bert_intermediate_size, sequence_length=sequence_length, max_sequence_length=bert_max_position_embeddings, type_vocab_size=bert_type_vocab_size) bert_config = configs.BertConfig( vocab_size, hidden_size=bert_hidden_size, num_hidden_layers=bert_num_hidden_layers, num_attention_heads=bert_num_attention_heads, intermediate_size=bert_intermediate_size, type_vocab_size=bert_type_vocab_size, max_position_embeddings=bert_max_position_embeddings) batch_size = 17 edit_tags_size = 19 bert_config.num_classes = edit_tags_size bert_config.query_size = 23 bert_config.query_transformer = query_transformer tagger = felix_tagger.FelixTagger(bert_encoder, bert_config=bert_config, seq_length=sequence_length, use_pointing=use_pointing, is_training=is_training) # Create inputs. np.random.seed(42) input_word_ids = np.random.randint(vocab_size - 1, size=(batch_size, sequence_length)) input_mask = np.random.randint(1, size=(batch_size, sequence_length)) input_type_ids = np.ones((batch_size, sequence_length)) edit_tags = np.random.randint(edit_tags_size - 2, size=(batch_size, sequence_length)) # Run the model. if is_training: output = tagger( [input_word_ids, input_type_ids, input_mask, edit_tags]) else: output = tagger([input_word_ids, input_type_ids, input_mask]) # Check output shapes. if use_pointing: tag_logits, pointing_logits = output self.assertEqual(pointing_logits.shape, (batch_size, sequence_length, sequence_length)) else: tag_logits = output[0] self.assertEqual(tag_logits.shape, (batch_size, sequence_length, edit_tags_size))