def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels): model = TFT5Model(config=config) inputs = { "input_ids": input_ids, "decoder_input_ids": input_ids, "decoder_attention_mask": input_mask, } result = model(inputs) result = model(input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids) decoder_output = result.last_hidden_state decoder_past = result.decoder_past_key_values encoder_output = result.encoder_last_hidden_state self.parent.assertListEqual( list(encoder_output.shape), [self.batch_size, self.seq_length, self.hidden_size]) self.parent.assertListEqual( list(decoder_output.shape), [self.batch_size, self.seq_length, self.hidden_size]) self.parent.assertEqual(len(decoder_past), 2) # decoder_past[0] should correspond to encoder output self.parent.assertTrue( tf.reduce_all(tf.math.equal(decoder_past[0][0], encoder_output))) # There should be `num_layers` key value embeddings stored in decoder_past[1] self.parent.assertEqual(len(decoder_past[1]), config.num_layers) # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple self.parent.assertEqual(len(decoder_past[1][0]), 4)
def create_and_check_t5_decoder_model_past_large_inputs( self, config, input_ids, decoder_input_ids, attention_mask ): model = TFT5Model(config=config).get_decoder() input_ids = input_ids[:1, :] attention_mask = attention_mask[:1, :] self.batch_size = 1 # first forward pass outputs = model(input_ids, attention_mask=attention_mask, use_cache=True) # create hypothetical next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size) next_attn_mask = ids_tensor((self.batch_size, 3), 2) # append to next input_ids and next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) next_attention_mask = tf.concat([attention_mask, next_attn_mask], axis=-1) output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)[0] output_from_past = model( next_tokens, attention_mask=next_attention_mask, past_key_values=outputs.past_key_values )[0] self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1]) # select random slice random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1])) output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx] output_from_past_slice = output_from_past[:, :, random_slice_idx] # test that outputs are equal for slice tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
def create_and_check_t5_decoder_model_past(self, config, input_ids, decoder_input_ids, attention_mask): model = TFT5Model(config=config).get_decoder() input_ids = input_ids[:1, :] self.batch_size = 1 # first forward pass outputs = model(input_ids, use_cache=True) outputs_use_cache_conf = model(input_ids) outputs_no_past = model(input_ids, use_cache=False) self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf)) self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1) # create hypothetical next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) # append to next input_ids and next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) output_from_no_past = model(next_input_ids)[0] output_from_past = model(next_tokens, past_key_values=outputs.past_key_values)[0] # select random slice random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1])) output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] output_from_past_slice = output_from_past[:, 0, random_slice_idx] # test that outputs are equal for slice tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
def __init__( self, pretrained_model_name_or_path='t5-small', reduce_output='sum', trainable=True, num_tokens=None, **kwargs ): super(T5Encoder, self).__init__() try: from transformers import TFT5Model except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) self.transformer = TFT5Model.from_pretrained( pretrained_model_name_or_path ) self.reduce_output = reduce_output self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)
def create_and_check_t5_decoder_model_attention_mask_past( self, config, input_ids, decoder_input_ids, attention_mask): model = TFT5Model(config=config).get_decoder() # create attention mask half_seq_length = self.seq_length // 2 attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32) attn_mask_end = tf.zeros( (self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32) attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1) # first forward pass _, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True) # create hypothetical next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) # change a random masked slice from input_ids random_seq_idx_to_change = ids_tensor( (1, ), half_seq_length).numpy() + 1 random_other_next_tokens = ids_tensor( (self.batch_size, self.seq_length), config.vocab_size) vector_condition = tf.range( self.seq_length) == (self.seq_length - random_seq_idx_to_change) condition = tf.transpose( tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))) input_ids = tf.where(condition, random_other_next_tokens, input_ids) # append to next input_ids and attn_mask next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) attn_mask = tf.concat( [attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)], axis=1, ) # get two different outputs output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0] # select random slice random_slice_idx = ids_tensor( (1, ), output_from_past.shape[-1]).numpy().item() output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] output_from_past_slice = output_from_past[:, 0, random_slice_idx] # test that outputs are equal for slice tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
def _test_TFT5Model(self, size, large=False): from transformers import T5Tokenizer, TFT5Model tokenizer = T5Tokenizer.from_pretrained(size) model = TFT5Model.from_pretrained(size) input_ids = \ tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids decoder_input_ids = \ tokenizer("Studies show that", return_tensors="tf").input_ids input_dict = {"input_ids": input_ids, "decoder_input_ids": decoder_input_ids} spec, input_dict = self.spec_and_pad(input_dict) outputs = ["last_hidden_state"] self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels): model = TFT5Model(config=config) inputs = { "inputs": input_ids, "decoder_input_ids": input_ids, "decoder_attention_mask": input_mask, } encoder_output, decoder_output = model(inputs) encoder_output, decoder_output = model( input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids ) result = { "encoder_output": encoder_output.numpy(), "decoder_output": decoder_output.numpy(), } self.parent.assertListEqual( list(result["encoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] ) self.parent.assertListEqual( list(result["decoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] )
def test_model_from_pretrained(self): for model_name in ["t5-small"]: model = TFT5Model.from_pretrained(model_name) self.assertIsNotNone(model)
def test_model_from_pretrained(self): model = TFT5Model.from_pretrained("t5-small") self.assertIsNotNone(model)
def test_model_from_pretrained(self): for model_name in ["t5-small"]: model = TFT5Model.from_pretrained(model_name, cache_dir=CACHE_DIR) self.assertIsNotNone(model)