def test_mobilebert_encoder_invocation_with_attention_score(self): vocab_size = 100 hidden_size = 32 sequence_length = 16 num_blocks = 3 test_network = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=vocab_size, hidden_size=hidden_size, num_blocks=num_blocks) word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) outputs = test_network([word_ids, mask, type_ids]) model = tf.keras.Model([word_ids, mask, type_ids], outputs) input_seq = generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) input_mask = generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=2) token_type = generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=2) outputs = model.predict([input_seq, input_mask, token_type]) self.assertLen(outputs['attention_scores'], num_blocks)
def test_mobilebert_encoder_invocation(self, input_mask_dtype): vocab_size = 100 hidden_size = 32 sequence_length = 16 num_blocks = 3 test_network = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=vocab_size, hidden_size=hidden_size, num_blocks=num_blocks, input_mask_dtype=input_mask_dtype) word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length, ), dtype=input_mask_dtype) type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) outputs = test_network([word_ids, mask, type_ids]) model = tf.keras.Model([word_ids, mask, type_ids], outputs) input_seq = generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) input_mask = generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=2) token_type = generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=2) outputs = model.predict([input_seq, input_mask, token_type]) sequence_output_shape = [1, sequence_length, hidden_size] self.assertAllEqual(outputs['sequence_output'].shape, sequence_output_shape) pooled_output_shape = [1, hidden_size] self.assertAllEqual(outputs['pooled_output'].shape, pooled_output_shape)
def test_mobilebert_encoder(self, act_fn, kq_shared_bottleneck, normalization_type, use_pooler): hidden_size = 32 sequence_length = 16 num_blocks = 3 test_network = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=100, hidden_size=hidden_size, num_blocks=num_blocks, intermediate_act_fn=act_fn, key_query_shared_bottleneck=kq_shared_bottleneck, normalization_type=normalization_type, classifier_activation=use_pooler) word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) outputs = test_network([word_ids, mask, type_ids]) layer_output, pooler_output = outputs['sequence_output'], outputs[ 'pooled_output'] self.assertIsInstance(test_network.transformer_layers, list) self.assertLen(test_network.transformer_layers, num_blocks) layer_output_shape = [None, sequence_length, hidden_size] self.assertAllEqual(layer_output.shape.as_list(), layer_output_shape) pooler_output_shape = [None, hidden_size] self.assertAllEqual(pooler_output.shape.as_list(), pooler_output_shape) self.assertAllEqual(tf.float32, layer_output.dtype)
def test_mobilebert_encoder_invocation_with_attention_score(self): vocab_size = 100 hidden_size = 32 sequence_length = 16 num_blocks = 3 test_network = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=vocab_size, hidden_size=hidden_size, num_blocks=num_blocks, return_all_layers=False, return_attention_score=True) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) layer_out_tensor, pooler_out_tensor, attention_out_tensor = test_network( [word_ids, mask, type_ids]) model = tf.keras.Model([word_ids, mask, type_ids], [layer_out_tensor, pooler_out_tensor, attention_out_tensor]) input_seq = utils.generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) input_mask = utils.generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=2) token_type = utils.generate_fake_input(batch_size=1, seq_len=sequence_length, vocab_size=2) _, _, attention_score_output = model.predict([input_seq, input_mask, token_type]) self.assertLen(attention_score_output, num_blocks)
def test_mobilebert_encoder_invocation(self): vocab_size = 100 hidden_size = 32 sequence_length = 16 num_blocks = 3 test_network = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=vocab_size, hidden_size=hidden_size, num_blocks=num_blocks, return_all_layers=False) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) layer_out_tensor, pooler_out_tensor = test_network( [word_ids, mask, type_ids]) model = tf.keras.Model([word_ids, mask, type_ids], [layer_out_tensor, pooler_out_tensor]) input_seq = generate_fake_input( batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) input_mask = generate_fake_input( batch_size=1, seq_len=sequence_length, vocab_size=2) token_type = generate_fake_input( batch_size=1, seq_len=sequence_length, vocab_size=2) layer_output, pooler_output = model.predict( [input_seq, input_mask, token_type]) layer_output_shape = [1, sequence_length, hidden_size] self.assertAllEqual(layer_output.shape, layer_output_shape) pooler_output_shape = [1, hidden_size] self.assertAllEqual(pooler_output.shape, pooler_output_shape)
def test_layer_invocation_with_external_logits(self): vocab_size = 100 sequence_length = 32 hidden_size = 64 embedding_width = 32 num_predictions = 21 xformer_stack = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=vocab_size, num_blocks=1, hidden_size=hidden_size, num_attention_heads=4, word_embed_size=embedding_width) test_layer = self.create_layer(vocab_size=vocab_size, hidden_size=hidden_size, embedding_width=embedding_width, xformer_stack=xformer_stack, output='predictions') logit_layer = self.create_layer(vocab_size=vocab_size, hidden_size=hidden_size, embedding_width=embedding_width, xformer_stack=xformer_stack, output='logits') # Create a model from the masked LM layer. lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) masked_positions = tf.keras.Input(shape=(num_predictions, ), dtype=tf.int32) output = test_layer(lm_input_tensor, masked_positions) logit_output = logit_layer(lm_input_tensor, masked_positions) logit_output = tf.keras.layers.Activation( tf.nn.log_softmax)(logit_output) logit_layer.set_weights(test_layer.get_weights()) model = tf.keras.Model([lm_input_tensor, masked_positions], output) logits_model = tf.keras.Model(([lm_input_tensor, masked_positions]), logit_output) # Invoke the masked LM on some fake data to make sure there are no runtime # errors in the code. batch_size = 3 lm_input_data = 10 * np.random.random_sample( (batch_size, sequence_length, hidden_size)) masked_position_data = np.random.randint(sequence_length, size=(batch_size, num_predictions)) # ref_outputs = model.predict([lm_input_data, masked_position_data]) # outputs = logits_model.predict([lm_input_data, masked_position_data]) ref_outputs = model([lm_input_data, masked_position_data]) outputs = logits_model([lm_input_data, masked_position_data]) # Ensure that the tensor shapes are correct. expected_output_shape = (batch_size, num_predictions, vocab_size) self.assertEqual(expected_output_shape, ref_outputs.shape) self.assertEqual(expected_output_shape, outputs.shape) self.assertAllClose(ref_outputs, outputs)
def test_mobilebert_encoder_for_downstream_task(self, task, prediction_shape): hidden_size = 32 sequence_length = 16 mobilebert_encoder = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=100, hidden_size=hidden_size) num_classes = 5 classifier = task(network=mobilebert_encoder, num_classes=num_classes) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) prediction = classifier([word_ids, mask, type_ids]) self.assertAllEqual(prediction.shape.as_list(), prediction_shape)
def test_mobilebert_encoder_return_all_layer_output(self): hidden_size = 32 sequence_length = 16 num_blocks = 3 test_network = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=100, hidden_size=hidden_size, num_blocks=num_blocks) word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32) outputs = test_network([word_ids, mask, type_ids]) all_layer_output = outputs['encoder_outputs'] self.assertIsInstance(all_layer_output, list) self.assertLen(all_layer_output, num_blocks + 1)
def create_layer(self, vocab_size, hidden_size, embedding_width, output='predictions', xformer_stack=None): # First, create a transformer stack that we can use to get the LM's # vocabulary weight. if xformer_stack is None: xformer_stack = mobile_bert_encoder.MobileBERTEncoder( word_vocab_size=vocab_size, num_blocks=1, hidden_size=hidden_size, num_attention_heads=4, word_embed_size=embedding_width) # Create a maskedLM from the transformer stack. test_layer = mobile_bert_layers.MobileBertMaskedLM( embedding_table=xformer_stack.get_embedding_table(), output=output) return test_layer